diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index a0c81c74ca1e..89750d428b45 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -444,7 +444,7 @@ While most applications need less than a thousand maps, certain programs, particularly malloc debuggers, may consume lots of them, e.g., up to one or two maps per allocation. -The default value is 65536. +The default value is 65530. memory_failure_early_kill: diff --git a/Documentation/devicetree/bindings/arm/idle-states.yaml b/Documentation/devicetree/bindings/arm/idle-states.yaml index ea805c1e6b20..52bce5dbb11f 100644 --- a/Documentation/devicetree/bindings/arm/idle-states.yaml +++ b/Documentation/devicetree/bindings/arm/idle-states.yaml @@ -313,7 +313,7 @@ patternProperties: wakeup-latency-us by this duration. idle-state-name: - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string description: A string used as a descriptive name for the idle state. diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml index 0503651cd214..863a287ebc7e 100644 --- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml +++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml @@ -34,7 +34,7 @@ properties: description: The SRAM that needs to be claimed to access the display engine bus. - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array maxItems: 1 ranges: true diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml index 0bee4694578a..4ac78b44e45e 100644 --- a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml +++ b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml @@ -46,7 +46,7 @@ properties: const: 1 syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the Baikal-T1 System Controller DT node interrupts: diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml index e709e530e17a..940486ef1051 100644 --- a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml +++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml @@ -29,18 +29,18 @@ properties: - const: fsl,imx8qxp-lpcg - items: - enum: - - fsl,imx8qm-lpcg + - fsl,imx8qm-lpcg - const: fsl,imx8qxp-lpcg - enum: - - fsl,imx8qxp-lpcg-adma - - fsl,imx8qxp-lpcg-conn - - fsl,imx8qxp-lpcg-dc - - fsl,imx8qxp-lpcg-dsp - - fsl,imx8qxp-lpcg-gpu - - fsl,imx8qxp-lpcg-hsio - - fsl,imx8qxp-lpcg-img - - fsl,imx8qxp-lpcg-lsio - - fsl,imx8qxp-lpcg-vpu + - fsl,imx8qxp-lpcg-adma + - fsl,imx8qxp-lpcg-conn + - fsl,imx8qxp-lpcg-dc + - fsl,imx8qxp-lpcg-dsp + - fsl,imx8qxp-lpcg-gpu + - fsl,imx8qxp-lpcg-hsio + - fsl,imx8qxp-lpcg-img + - fsl,imx8qxp-lpcg-lsio + - fsl,imx8qxp-lpcg-vpu deprecated: true reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml index a84464b3e1f2..4286ed767a0a 100644 --- a/Documentation/devicetree/bindings/connector/usb-connector.yaml +++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml @@ -37,7 +37,7 @@ properties: description: Size of the connector, should be specified in case of non-fullsize 'usb-a-connector' or 'usb-b-connector' compatible connectors. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - mini @@ -67,7 +67,7 @@ properties: power-role: description: Determines the power role that the Type C connector will support. "dual" refers to Dual Role Port (DRP). - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - source @@ -76,7 +76,7 @@ properties: try-power-role: description: Preferred power role. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - source @@ -86,7 +86,7 @@ properties: data-role: description: Data role if Type C connector supports USB data. "dual" refers Dual Role Device (DRD). - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - host @@ -105,7 +105,7 @@ properties: Type-C Cable and Connector specification, when Power Delivery is not supported. allOf: - - $ref: /schemas/types.yaml#definitions/string + - $ref: /schemas/types.yaml#/definitions/string enum: - default - 1.5A diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml index 60585a4fc22b..9392b5502a32 100644 --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml @@ -49,8 +49,8 @@ properties: Video port for panel or connector. required: - - port@0 - - port@1 + - port@0 + - port@1 required: - compatible diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml index 8c0e4f285fbc..fccd63521a8c 100644 --- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml +++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml @@ -26,11 +26,9 @@ properties: description: GPIO connected to active low reset dvdd12-supply: - maxItems: 1 description: Regulator for 1.2V digital core power. dvdd25-supply: - maxItems: 1 description: Regulator for 2.5V digital core power. ports: diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml index ab5be2625224..35c9dfd86650 100644 --- a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml @@ -39,10 +39,10 @@ properties: properties: '#address-cells': - const: 1 + const: 1 '#size-cells': - const: 0 + const: 0 port@0: type: object diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml index efbb3d0117dc..02cfc0a3b550 100644 --- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml @@ -35,11 +35,9 @@ properties: maxItems: 1 ovdd-supply: - maxItems: 1 description: I/O voltage pwr18-supply: - maxItems: 1 description: core voltage interrupts: diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml index e5e3c72630cf..66a14d60ce1d 100644 --- a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml +++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml @@ -79,8 +79,7 @@ properties: The GPIO used to control the power down line of this device. maxItems: 1 - power-supply: - maxItems: 1 + power-supply: true required: - compatible diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml index 7e27cfcf770d..763c7909473e 100644 --- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml @@ -35,11 +35,9 @@ properties: description: GPIO connected to active low reset. vdd12-supply: - maxItems: 1 description: Regulator for 1.2V digital core power. vdd33-supply: - maxItems: 1 description: Regulator for 3.3V digital core power. ports: diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml index 3ddb35fcf0a2..64e8a1c24b40 100644 --- a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml +++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml @@ -60,7 +60,6 @@ properties: description: GPIO controlling bridge enable vdd-supply: - maxItems: 1 description: Power supply for the bridge required: diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml index 469ac4a34273..3d5ce08a5792 100644 --- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml +++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml @@ -74,7 +74,6 @@ properties: description: Power down GPIO signal, pin name "/PDWN", active low. vcc-supply: - maxItems: 1 description: Power supply for the TTL output, TTL CLOCKOUT signal, LVDS input, PLL and digital circuitry. diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml index fd3113aa9ccd..b5959cc78b8d 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml @@ -28,11 +28,9 @@ properties: description: i2c address of the bridge, 0x0f vdd-supply: - maxItems: 1 description: 1.2V LVDS Power Supply vddio-supply: - maxItems: 1 description: 1.8V IO Power Supply stby-gpios: diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml index 40caa6118809..a222b52d8b8f 100644 --- a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml +++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml @@ -18,8 +18,8 @@ description: | properties: compatible: items: - - const: intel,keembay-msscam - - const: syscon + - const: intel,keembay-msscam + - const: syscon reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml index 91cb4c3e0198..a108029ecfab 100644 --- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml +++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml @@ -32,7 +32,7 @@ required: - power-supply - reset-gpios -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml index d2170de6b723..2f5df1d235ae 100644 --- a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml +++ b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml @@ -22,7 +22,7 @@ properties: compatible: items: - enum: - - tianma,fhd-video + - tianma,fhd-video - const: novatek,nt36672a description: This indicates the panel manufacturer of the panel that is in turn using the NT36672A panel driver. This compatible string diff --git a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml index 7b9d468c3e52..403d57977ee7 100644 --- a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml +++ b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml @@ -98,7 +98,6 @@ properties: maxItems: 1 dmas: - maxItems: 4 items: - description: Video layer, plane 0 (RGB or luma) - description: Video layer, plane 1 (U/V or U) diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml index 307b499e8968..ad06d36af208 100644 --- a/Documentation/devicetree/bindings/dma/dma-common.yaml +++ b/Documentation/devicetree/bindings/dma/dma-common.yaml @@ -38,12 +38,12 @@ properties: maxItems: 255 dma-channels: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of DMA channels supported by the controller. dma-requests: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of DMA request signals supported by the controller. diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml index 4cee5667b8a8..e72748496fd9 100644 --- a/Documentation/devicetree/bindings/dma/dma-router.yaml +++ b/Documentation/devicetree/bindings/dma/dma-router.yaml @@ -23,7 +23,7 @@ properties: pattern: "^dma-router(@.*)?$" dma-masters: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Array of phandles to the DMA controllers the router can direct the signal to. diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 00f19b3cac31..6a2043721b95 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -48,7 +48,7 @@ properties: ingenic,reserved-channels property. ingenic,reserved-channels: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: > Bitmask of channels to reserve for devices that need a specific channel. These channels will only be assigned when explicitely diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml index b548e4723936..c07eb6f2fc8d 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml @@ -73,7 +73,6 @@ properties: maxItems: 1 clock-names: - maxItems: 1 items: - const: fck diff --git a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml index ef1d6879c158..6b35089ac017 100644 --- a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml +++ b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml @@ -54,7 +54,7 @@ properties: maximum: 16 dma-masters: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Number of DMA masters supported by the controller. In case if not specified the driver will try to auto-detect this and @@ -63,7 +63,7 @@ properties: maximum: 4 chan_allocation_order: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | DMA channels allocation order specifier. Zero means ascending order (first free allocated), while one - descending (last free allocated). @@ -71,7 +71,7 @@ properties: enum: [0, 1] chan_priority: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | DMA channels priority order. Zero means ascending channels priority so the very first channel has the highest priority. While 1 means @@ -80,7 +80,7 @@ properties: enum: [0, 1] block_size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Maximum block size supported by the DMA controller. enum: [3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095] @@ -139,7 +139,7 @@ properties: default: 256 snps,dma-protection-control: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Bits one-to-one passed to the AHB HPROT[3:1] bus. Each bit setting indicates the following features: bit 0 - privileged mode, diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 6edfa705b486..d5117c638b75 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -131,7 +131,7 @@ properties: default: 1 read-only: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Disables writes to the eeprom. @@ -141,7 +141,7 @@ properties: Total eeprom size in bytes. no-read-rollover: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that the multi-address eeprom does not automatically roll over reads to the next slave address. Please consult the manual of diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 744973637678..121a601db22e 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -45,13 +45,13 @@ properties: spi-max-frequency: true pagesize: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072] description: Size of the eeprom page. size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Total eeprom size in bytes. diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml index 6f3e3c01f717..b79f069a04c2 100644 --- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml +++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml @@ -32,7 +32,7 @@ properties: PVT controller has 5 VM (voltage monitor) sensors. vm-map defines CPU core to VM instance mapping. A value of 0xff means that VM sensor is unused. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array maxItems: 5 clocks: diff --git a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml index c523a1beb2b7..7d49478d9668 100644 --- a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml +++ b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml @@ -29,12 +29,12 @@ properties: const: 0x70 sensirion,blocking-io: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, the driver hold the i2c bus until measurement is finished. sensirion,low-precision: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, the sensor aquire data with low precision (not recommended). The driver aquire data with high precision by default. diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml index c17e5d3ee3f1..8020d739a078 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml @@ -61,7 +61,7 @@ properties: Array of three(TMP513) or two(TMP512) n-Factor value for each remote temperature channel. See datasheet Table 11 for n-Factor range list and value interpretation. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 maxItems: 3 items: diff --git a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml index 6a991e9f78e2..2716d4e95329 100644 --- a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml +++ b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml @@ -17,8 +17,7 @@ properties: - lltc,ltc2496 vref-supply: - description: phandle to an external regulator providing the reference voltage - $ref: /schemas/types.yaml#/definitions/phandle + description: Power supply for the reference voltage reg: description: spi chipselect number according to the usual spi bindings diff --git a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml index 7037f82ec753..88384b69f917 100644 --- a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml +++ b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml @@ -22,8 +22,7 @@ properties: - ti,hdc2010 - ti,hdc2080 - vdd-supply: - maxItems: 1 + vdd-supply: true reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml index 4a9b2827cf7b..de5882cb3360 100644 --- a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml +++ b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml @@ -45,7 +45,7 @@ properties: default: 0x16 upisemi,continuous: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: | This chip has two power modes: one-shot (chip takes one measurement and then shuts itself down) and continuous (chip takes continuous diff --git a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml index ccfb163f3d34..5de0bb2180e6 100644 --- a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml @@ -72,7 +72,7 @@ properties: - finest semtech,startup-sensor: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2, 3] default: 0 description: @@ -81,7 +81,7 @@ properties: compensation. semtech,proxraw-strength: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 2, 4, 8] default: 2 description: @@ -89,7 +89,7 @@ properties: represent 1-1/N. semtech,avg-pos-strength: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 16, 64, 128, 256, 512, 1024, 4294967295] default: 16 description: diff --git a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml index 378a85c09d34..878464f128dc 100644 --- a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml +++ b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml @@ -31,8 +31,7 @@ properties: interrupts: maxItems: 1 - vdd-supply: - maxItems: 1 + vdd-supply: true linux,keycodes: minItems: 1 diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 6966ab009fa3..060a309ff8e7 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -34,13 +34,13 @@ patternProperties: linux,code: description: Key / Axis code to emit. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 linux,input-type: description: Specify event type this button/key generates. If not specified defaults to <1> == EV_KEY. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 default: 1 @@ -56,12 +56,12 @@ patternProperties: linux,input-value = <0xffffffff>; /* -1 */ - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 debounce-interval: description: Debouncing interval time in milliseconds. If not specified defaults to 5. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 default: 5 @@ -79,7 +79,7 @@ patternProperties: EV_ACT_ANY - both asserted and deasserted EV_ACT_ASSERTED - asserted EV_ACT_DEASSERTED - deasserted - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] linux,can-disable: @@ -118,7 +118,7 @@ then: poll-interval: description: Poll interval time in milliseconds - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 required: - poll-interval diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 4ce109476a0e..bfc3a8b5e118 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -55,8 +55,7 @@ properties: wakeup-source: true - vcc-supply: - maxItems: 1 + vcc-supply: true gain: description: Allows setting the sensitivity in the range from 0 to 31. diff --git a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml index 039e08af98bb..91bb3c2307a7 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml @@ -42,7 +42,7 @@ properties: Specifies the list of CPU interrupt vectors to which the GIC may not route interrupts. This property is ignored if the CPU is started in EIC mode. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 maxItems: 6 uniqueItems: true @@ -56,7 +56,7 @@ properties: It accepts two values: the 1st is the starting interrupt and the 2nd is the size of the reserved range. If not specified, the driver will allocate the last (2 * number of VPEs in the system). - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 maximum: 254 diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml index 1c4c009dedd0..c2ce215501a5 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml @@ -80,7 +80,7 @@ properties: mapping is provided. ti,irqs-reserved: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Bitmask of host interrupts between 0 and 7 (corresponding to PRUSS INTC output interrupts 2 through 9) that are not connected to the Arm interrupt diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml index b5af12011499..3d89668573e8 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml @@ -76,7 +76,7 @@ properties: "limit" specifies the limit for translation ti,unmapped-event-sources: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Array of phandles to DMA controllers where the unmapped events originate. diff --git a/Documentation/devicetree/bindings/leds/backlight/common.yaml b/Documentation/devicetree/bindings/leds/backlight/common.yaml index bc817f77d2b1..702ba350d869 100644 --- a/Documentation/devicetree/bindings/leds/backlight/common.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/common.yaml @@ -22,7 +22,7 @@ properties: The default brightness that should be applied to the LED by the operating system on start-up. The brightness should not exceed the brightness the LED can provide. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 max-brightness: description: @@ -31,6 +31,6 @@ properties: on the brightness apart from what the driver says, as it could happen that a LED can be made so bright that it gets damaged or causes damage due to restrictions in a specific system, such as mounting conditions. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 additionalProperties: true diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml index f1211e7045f1..b1f363747a62 100644 --- a/Documentation/devicetree/bindings/leds/common.yaml +++ b/Documentation/devicetree/bindings/leds/common.yaml @@ -27,21 +27,21 @@ properties: List of device current outputs the LED is connected to. The outputs are identified by the numbers that must be defined in the LED device binding documentation. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array function: description: LED function. Use one of the LED_FUNCTION_* prefixed definitions from the header include/dt-bindings/leds/common.h. If there is no matching LED_FUNCTION available, add a new one. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string color: description: Color of the LED. Use one of the LED_COLOR_ID_* prefixed definitions from the header include/dt-bindings/leds/common.h. If there is no matching LED_COLOR_ID available, add a new one. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 maximum: 9 @@ -49,7 +49,7 @@ properties: description: Integer to be used when more than one instance of the same function is needed, differing only with an ordinal number. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 label: description: @@ -66,7 +66,7 @@ properties: produced where the LED momentarily turns off (or on). The "keep" setting will keep the LED at whatever its current state is, without producing a glitch. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: - on - off @@ -77,7 +77,7 @@ properties: description: This parameter, if present, is a string defining the trigger assigned to the LED. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string enum: # LED will act as a back-light, controlled by the framebuffer system @@ -109,7 +109,7 @@ properties: brightness and duration (in ms). The exact format is described in: Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt - $ref: /schemas/types.yaml#definitions/uint32-matrix + $ref: /schemas/types.yaml#/definitions/uint32-matrix items: minItems: 2 maxItems: 2 @@ -143,7 +143,7 @@ properties: the device tree and be referenced by a phandle and a set of phandle arguments. A length of arguments should be specified by the #trigger-source-cells property in the source node. - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array # Required properties for flash LED child nodes: flash-max-microamp: diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml index 58e974793a79..f552cd143d5b 100644 --- a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml +++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml @@ -35,7 +35,7 @@ properties: description: I2C slave address clock-mode: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Input clock mode enum: @@ -49,7 +49,7 @@ properties: GPIO attached to the chip's enable pin pwr-sel: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | LP8501 specific property. Power selection for output channels. enum: @@ -70,14 +70,14 @@ patternProperties: $ref: common.yaml# properties: led-cur: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: | Current setting at each LED channel (mA x10, 0 if LED is not connected) minimum: 0 maximum: 255 max-cur: - $ref: /schemas/types.yaml#definitions/uint8 + $ref: /schemas/types.yaml#/definitions/uint8 description: Maximun current at each LED channel. reg: @@ -97,7 +97,7 @@ patternProperties: - 8 # LED output D9 chan-name: - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string description: name of channel required: diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml index d43791a2dde7..d07eb00b97c8 100644 --- a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml @@ -61,7 +61,6 @@ properties: - description: low-priority non-secure - description: high-priority non-secure - description: Secure - maxItems: 3 clocks: maxItems: 1 diff --git a/Documentation/devicetree/bindings/media/coda.yaml b/Documentation/devicetree/bindings/media/coda.yaml index 7bac0057faf7..36781ee4617f 100644 --- a/Documentation/devicetree/bindings/media/coda.yaml +++ b/Documentation/devicetree/bindings/media/coda.yaml @@ -44,6 +44,21 @@ properties: - const: per - const: ahb + interrupts: + minItems: 1 + items: + - description: BIT processor interrupt + - description: JPEG unit interrupt + + interrupt-names: + minItems: 1 + items: + - const: bit + - const: jpeg + + power-domains: + maxItems: 1 + resets: maxItems: 1 @@ -59,6 +74,8 @@ required: - clocks - clock-names +additionalProperties: false + allOf: - if: properties: @@ -68,34 +85,17 @@ allOf: then: properties: interrupts: - items: - - description: BIT processor interrupt - - description: JPEG unit interrupt + minItems: 2 interrupt-names: - items: - - const: bit - - const: jpeg + minItems: 2 else: properties: interrupts: - items: - - description: BIT processor interrupt - - - if: - properties: - compatible: - contains: - enum: - - fsl,imx6dl-vpu - - fsl,imx6q-vpu - then: - properties: - power-domains: - $ref: /schemas/types.yaml#/definitions/phandle - description: phandle pointing to the PU power domain maxItems: 1 + power-domains: false + examples: - | vpu: video-codec@63ff4000 { diff --git a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml index 9ea827092fdd..68ee8c7d9e79 100644 --- a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml +++ b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml @@ -40,7 +40,6 @@ properties: poc-supply: description: Regulator providing Power over Coax to the cameras - maxItems: 1 enable-gpios: description: GPIO connected to the \#PWDN pin with inverted polarity diff --git a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml index d94bd67ccea1..bb3528315f20 100644 --- a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml +++ b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml @@ -1,6 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) # Copyright (C) 2014--2020 Intel Corporation - +%YAML 1.2 +--- $id: http://devicetree.org/schemas/media/i2c/mipi-ccs.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# @@ -26,11 +27,11 @@ properties: compatible: oneOf: - items: - - const: mipi-ccs-1.1 - - const: mipi-ccs + - const: mipi-ccs-1.1 + - const: mipi-ccs - items: - - const: mipi-ccs-1.0 - - const: mipi-ccs + - const: mipi-ccs-1.0 + - const: mipi-ccs - const: nokia,smia reg: @@ -38,15 +39,12 @@ properties: vana-supply: description: Analogue voltage supply (VANA), sensor dependent. - maxItems: 1 vcore-supply: description: Core voltage supply (VCore), sensor dependent. - maxItems: 1 vio-supply: description: I/O voltage supply (VIO), sensor dependent. - maxItems: 1 clocks: description: External clock to the sensor. diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml index 1a3590dd0e98..eb12526a462f 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml @@ -37,15 +37,12 @@ properties: vdddo-supply: description: Chip digital IO regulator (1.8V). - maxItems: 1 vdda-supply: description: Chip analog regulator (2.7V). - maxItems: 1 vddd-supply: description: Chip digital core regulator (1.12V). - maxItems: 1 flash-leds: description: See ../video-interfaces.txt diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml index f697e1a20beb..a66acb20d59b 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml @@ -33,15 +33,12 @@ properties: vana-supply: description: Sensor 2.8 V analog supply. - maxItems: 1 vdig-supply: description: Sensor 1.8 V digital core supply. - maxItems: 1 vddl-supply: description: Sensor digital IO 1.2 V supply. - maxItems: 1 port: type: object diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml index 888ab4b5df45..19e9afb385ac 100644 --- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -26,8 +26,7 @@ properties: drive-open-drain: true - vdd-supply: - maxItems: 1 + vdd-supply: true pinctrl: type: object diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index c7c9ad4e3f9f..7f2578d48e3f 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -38,7 +38,7 @@ properties: const: stmmaceth syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the device containing the EMAC or GMAC clock register @@ -114,7 +114,7 @@ allOf: then: properties: allwinner,leds-active-low: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: EPHY LEDs are active low. @@ -126,7 +126,7 @@ allOf: const: allwinner,sun8i-h3-mdio-mux mdio-parent-bus: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to EMAC MDIO. diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml index 6b057b117aa0..1f133f4a2924 100644 --- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml @@ -60,7 +60,7 @@ allOf: - const: timing-adjustment amlogic,tx-delay-ns: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The internal RGMII TX clock delay (provided by this driver) in nanoseconds. Allowed values are 0ns, 2ns, 4ns, 6ns. diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 8e044631bcf7..8a3494db4d8d 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -54,7 +54,7 @@ patternProperties: description: Describes the label associated with this port, which will become the netdev name - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string link: description: @@ -62,13 +62,13 @@ patternProperties: port is used as the outgoing port towards the phandle ports. The full routing information must be given, not just the one hop routes to neighbouring switches - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array ethernet: description: Should be a phandle to a valid Ethernet device node. This host device is what the switch port is connected to - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle phy-handle: true diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index cc93063a8f39..0965f6515f9e 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -16,7 +16,7 @@ properties: local-mac-address: description: Specifies the MAC address that was assigned to the network device. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array items: - minItems: 6 maxItems: 6 @@ -27,20 +27,20 @@ properties: program; should be used in cases where the MAC address assigned to the device by the boot program is different from the local-mac-address property. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array items: - minItems: 6 maxItems: 6 max-frame-size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Maximum transfer unit (IEEE defined MTU), rather than the maximum frame size (there\'s contradiction in the Devicetree Specification). max-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Specifies maximum speed in Mbit/s supported by the device. @@ -101,7 +101,7 @@ properties: $ref: "#/properties/phy-connection-type" phy-handle: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a PHY device. @@ -114,7 +114,7 @@ properties: deprecated: true rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The size of the controller\'s receive fifo in bytes. This is used for components that can have configurable receive fifo sizes, @@ -129,12 +129,12 @@ properties: If this property is present then the MAC applies the RX delay. sfp: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a SFP cage. tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The size of the controller\'s transmit fifo in bytes. This is used for components that can have configurable fifo sizes. @@ -150,7 +150,7 @@ properties: description: Specifies the PHY management type. If auto is set and fixed-link is not specified, it uses MDIO for management. - $ref: /schemas/types.yaml#definitions/string + $ref: /schemas/types.yaml#/definitions/string default: auto enum: - auto @@ -198,17 +198,17 @@ properties: speed: description: Link speed. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [10, 100, 1000] full-duplex: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that full-duplex is used. When absent, half duplex is assumed. asym-pause: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that asym_pause should be enabled. diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 6dd72faebd89..2766fe45bb98 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -78,57 +78,57 @@ properties: Maximum PHY supported speed in Mbits / seconds. broken-turn-around: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the PHY device does not correctly release the turn around line low at end of the control phase of the MDIO transaction. enet-phy-lane-swap: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the PHY will swap the TX/RX lanes to compensate for the board being designed with the lanes swapped. eee-broken-100tx: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-1000t: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gt: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-1000kx: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gkx4: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. eee-broken-10gkr: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Mark the corresponding energy efficient ethernet mode as broken and request the ethernet to stop advertising it. phy-is-integrated: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates that the PHY is integrated into the same physical package as the Ethernet MAC. If needed, muxers @@ -158,7 +158,7 @@ properties: this property is missing the delay will be skipped. sfp: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Specifies a reference to a node representing a SFP cage. diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index 2159b7d1f537..7f620a71a972 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -31,7 +31,7 @@ properties: phy-mode: true pcs-handle: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: A reference to a node representing a PCS PHY device found on the internal MDIO bus. diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml index e811e0fd851c..08e15fb1584f 100644 --- a/Documentation/devicetree/bindings/net/mdio.yaml +++ b/Documentation/devicetree/bindings/net/mdio.yaml @@ -70,7 +70,7 @@ patternProperties: The ID number for the device. broken-turn-around: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: If set, indicates the MDIO device does not correctly release the turn around line low at end of the control phase of the diff --git a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml index 0bbd598704e9..e6a5ff208253 100644 --- a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml +++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml @@ -42,7 +42,7 @@ properties: - const: trans mediatek,pericfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the device containing the PERICFG register range. This is used to control the MII mode. diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4d8464b2676d..8a2d12644675 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -114,14 +114,13 @@ properties: validating firwmare used by the GSI. modem-remoteproc: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: This defines the phandle to the remoteproc node representing the modem subsystem. This is requied so the IPA driver can receive and act on notifications of modem up/down events. memory-region: - $ref: /schemas/types.yaml#/definitions/phandle-array maxItems: 1 description: If present, a phandle for a reserved memory area that holds diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 11a6fdb657c9..b2f6083f556a 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -126,7 +126,7 @@ properties: in a different mode than the PHY in order to function. snps,axi-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: AXI BUS Mode parameters. Phandle to a node that can contain the following properties @@ -141,7 +141,7 @@ properties: * snps,rb, rebuild INCRx Burst snps,mtl-rx-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Multiple RX Queues parameters. Phandle to a node that can contain the following properties @@ -164,7 +164,7 @@ properties: * snps,priority, RX queue priority (Range 0x0 to 0xF) snps,mtl-tx-config: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Multiple TX Queues parameters. Phandle to a node that can contain the following properties @@ -198,7 +198,7 @@ properties: snps,reset-active-low: deprecated: true - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Indicates that the PHY Reset is active low @@ -208,55 +208,55 @@ properties: Triplet of delays. The 1st cell is reset pre-delay in micro seconds. The 2nd cell is reset pulse in micro seconds. The 3rd cell is reset post-delay in micro seconds. - $ref: /schemas/types.yaml#definitions/uint32-array + $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 3 maxItems: 3 snps,aal: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Use Address-Aligned Beats snps,fixed-burst: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Program the DMA to use the fixed burst mode snps,mixed-burst: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Program the DMA to use the mixed burst mode snps,force_thresh_dma_mode: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Force DMA to use the threshold mode for both tx and rx snps,force_sf_dma_mode: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Force DMA to use the Store and Forward mode for both tx and rx. This flag is ignored if force_thresh_dma_mode is set. snps,en-tx-lpi-clockgating: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Enable gating of the MAC TX clock during TX low-power mode snps,multicast-filter-bins: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of multicast filter hash bins supported by this device instance snps,perfect-filter-entries: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Number of perfect filter entries supported by this device instance snps,ps-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: Port selection speed that can be passed to the core when PCS is supported. For example, this is used in case of SGMII and @@ -307,25 +307,25 @@ allOf: snps,pbl: description: Programmable Burst Length (tx and rx) - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,txpbl: description: Tx Programmable Burst Length. If set, DMA tx will use this value rather than snps,pbl. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,rxpbl: description: Rx Programmable Burst Length. If set, DMA rx will use this value rather than snps,pbl. - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 enum: [2, 4, 8] snps,no-pbl-x8: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core rev < 3.50, don\'t multiply the values by 4. @@ -351,7 +351,7 @@ allOf: then: properties: snps,tso: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Enables the TSO feature otherwise it will be managed by MAC HW capability register. diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index cbacc04fc9e6..8a03a24a2019 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -64,7 +64,7 @@ properties: - const: ether # for others socionext,syscon-phy-mode: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: A phandle to syscon with one argument that configures phy mode. The argument is the ID of MAC instance. diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml index dadeb8f811c0..07a00f53adbf 100644 --- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml +++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml @@ -70,7 +70,7 @@ properties: pinctrl-names: true syscon: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: Phandle to the system control device node which provides access to efuse IO range with MAC addresses diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml index 4050a3608658..047d757e8d82 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml @@ -47,31 +47,31 @@ properties: takes precedence. tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth see dt-bindings/net/ti-dp83867.h for values rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Receive FIFO depth see dt-bindings/net/ti-dp83867.h for values ti,clk-output-sel: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Muxing option for CLK_OUT pin. See dt-bindings/net/ti-dp83867.h for applicable values. The CLK_OUT pin can also be disabled by this property. When omitted, the PHY's default will be left as is. ti,rx-internal-delay: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h for applicable values. Required only if interface type is PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID. ti,tx-internal-delay: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h for applicable values. Required only if interface type is @@ -101,7 +101,7 @@ properties: ti,fifo-depth: deprecated: true - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable values. diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml index c3235f08e326..70a1209cb13b 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml @@ -44,22 +44,22 @@ properties: to a maximum value (70 ohms). tx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Transmitt FIFO depth see dt-bindings/net/ti-dp83869.h for values rx-fifo-depth: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Receive FIFO depth see dt-bindings/net/ti-dp83869.h for values ti,clk-output-sel: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Muxing option for CLK_OUT pin see dt-bindings/net/ti-dp83869.h for values. ti,op-mode: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Operational mode for the PHY. If this is not set then the operational mode is set by the straps. see dt-bindings/net/ti-dp83869.h for values diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 227270cbf892..c47b58f3e3f6 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -119,12 +119,12 @@ properties: description: label associated with this port ti,mac-only: - $ref: /schemas/types.yaml#definitions/flag + $ref: /schemas/types.yaml#/definitions/flag description: Specifies the port works in mac-only mode. ti,syscon-efuse: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: Phandle to the system control device node which provides access to efuse IO range with MAC addresses diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index 6af999191559..85c2f699d602 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -136,7 +136,7 @@ properties: - const: tcl2host-status-ring qcom,rproc: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: DT entry of q6v5-wcss remoteproc driver. Phandle to a node that can contain the following properties diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml index 83d5d0aceb04..cbbf5e8b1197 100644 --- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml +++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml @@ -44,13 +44,13 @@ properties: - const: refclk syscon-phy-power: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: phandle/offset pair. Phandle to the system control module and register offset to power on/off the PHY. ctrl-module: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: (deprecated) phandle of the control module used by PHY driver to power on the PHY. Use syscon-phy-power instead. diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml index fd12bafe3548..d14cb9bac849 100644 --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml @@ -83,11 +83,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. patternProperties: @@ -131,11 +131,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. patternProperties: @@ -179,11 +179,11 @@ patternProperties: SUSBSYS clocks. mediatek,infracfg: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. mediatek,smi: - $ref: /schemas/types.yaml#definitions/phandle + $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. required: diff --git a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml index ee92e6a076ac..5fcdf5801536 100644 --- a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml @@ -27,7 +27,7 @@ properties: of this binary blob is kept secret by CellWise. The only way to obtain it is to mail two batteries to a test facility of CellWise and receive back a test report with the binary blob. - $ref: /schemas/types.yaml#definitions/uint8-array + $ref: /schemas/types.yaml#/definitions/uint8-array minItems: 64 maxItems: 64 diff --git a/Documentation/devicetree/bindings/powerpc/sleep.yaml b/Documentation/devicetree/bindings/powerpc/sleep.yaml index 6494c7d08b93..1b0936a5beec 100644 --- a/Documentation/devicetree/bindings/powerpc/sleep.yaml +++ b/Documentation/devicetree/bindings/powerpc/sleep.yaml @@ -42,6 +42,6 @@ select: true properties: sleep: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array additionalProperties: true diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml index e7b3abe30363..0a66338c7e5a 100644 --- a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml @@ -59,7 +59,6 @@ properties: description: u32 value representing regulator enable bit offset. vin-supply: - $ref: '/schemas/types.yaml#/definitions/phandle' description: input supply phandle. required: diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index c1d4c196f005..f54cae9ff7b2 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -126,7 +126,7 @@ properties: maxItems: 1 current-speed: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The current active speed of the UART. reg-offset: @@ -154,7 +154,7 @@ properties: Set to indicate that the port does not implement loopback test mode. fifo-size: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: The fifo size of the UART. auto-flow-control: @@ -165,7 +165,7 @@ properties: property. tx-threshold: - $ref: /schemas/types.yaml#definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32 description: | Specify the TX FIFO low water indication for parts with programmable TX FIFO thresholds. diff --git a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml index bc79b3cca542..c4f1f489dc2d 100644 --- a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml +++ b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml @@ -29,6 +29,8 @@ required: - compatible - reg +additionalProperties: false + examples: - | uart0: serial@e0001800 { diff --git a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml index e2b788796e79..c8b57c7fd08c 100644 --- a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml +++ b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml @@ -28,6 +28,8 @@ required: - compatible - reg +additionalProperties: false + examples: - | soc_ctrl0: soc-controller@f0000000 { diff --git a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml index 6c763f873a63..31e4d3c339bf 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml @@ -44,6 +44,8 @@ required: - clocks - clock-names +additionalProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml index c3c595e235a8..ddea3d41971d 100644 --- a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml +++ b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml @@ -55,7 +55,7 @@ properties: description: TI-SCI RM subtype for GP ring range ti,sci: - $ref: /schemas/types.yaml#definitions/phandle-array + $ref: /schemas/types.yaml#/definitions/phandle-array description: phandle on TI-SCI compatible System controller node ti,sci-dev-id: diff --git a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml index 378d0ced43c8..cb245f400287 100644 --- a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml +++ b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml @@ -26,9 +26,18 @@ required: - compatible - reg +additionalProperties: false + examples: - | - xlnx_vcu: vcu@a0041000 { - compatible = "xlnx,vcu-settings", "syscon"; - reg = <0x0 0xa0041000 0x0 0x1000>; + fpga { + #address-cells = <2>; + #size-cells = <2>; + + xlnx_vcu: vcu@a0041000 { + compatible = "xlnx,vcu-settings", "syscon"; + reg = <0x0 0xa0041000 0x0 0x1000>; + }; }; + +... diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml index be390accdd07..dd47fef9854d 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml @@ -57,7 +57,7 @@ properties: A list of the connections between audio components. Each entry is a pair of strings, the first being the connection's sink, the second being the connection's source. - $ref: /schemas/types.yaml#definitions/non-unique-string-array + $ref: /schemas/types.yaml#/definitions/non-unique-string-array minItems: 2 maxItems: 18 items: diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml index e543a6123792..b55775e21de6 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml @@ -44,7 +44,6 @@ properties: maxItems: 3 clock-names: - maxItems: 3 items: - const: hda - const: hda2hdmi @@ -54,7 +53,6 @@ properties: maxItems: 3 reset-names: - maxItems: 3 items: - const: hda - const: hda2hdmi diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index 6ad48c7681c1..f2443b651282 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml @@ -106,7 +106,7 @@ patternProperties: Must contain the phandle and index of the SAI sub-block providing the synchronization. allOf: - - $ref: /schemas/types.yaml#definitions/phandle-array + - $ref: /schemas/types.yaml#/definitions/phandle-array - maxItems: 1 st,iec60958: @@ -117,7 +117,7 @@ patternProperties: configured according to protocol defined in related DAI link node, such as i2s, left justified, right justified, dsp and pdm protocols. allOf: - - $ref: /schemas/types.yaml#definitions/flag + - $ref: /schemas/types.yaml#/definitions/flag "#clock-cells": description: Configure the SAI device as master clock provider. diff --git a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml index 0f078bd0a3e5..22603256ddf8 100644 --- a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml +++ b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml @@ -51,7 +51,6 @@ properties: maxItems: 1 phy-names: - maxItems: 1 items: - const: usb diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml index 737c1f47b7de..54c361d4a7af 100644 --- a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml +++ b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml @@ -74,11 +74,8 @@ properties: phys: maxItems: 1 - items: - - description: phandle + phy specifier pair. phy-names: - maxItems: 1 items: - const: usb diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml index e8f226376108..5ac607de8be4 100644 --- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -21,6 +21,9 @@ properties: - items: - const: allwinner,sun50i-a64-wdt - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-a100-wdt + - const: allwinner,sun6i-a31-wdt - items: - const: allwinner,sun50i-h6-wdt - const: allwinner,sun6i-a31-wdt diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml index d9fc7bb851b1..f7ee9229c29f 100644 --- a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -14,7 +14,15 @@ maintainers: properties: compatible: - const: snps,dw-wdt + oneOf: + - const: snps,dw-wdt + - items: + - enum: + - rockchip,rk3066-wdt + - rockchip,rk3188-wdt + - rockchip,rk3288-wdt + - rockchip,rk3368-wdt + - const: snps,dw-wdt reg: maxItems: 1 diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst index 849d5b119eb8..cdbfec473167 100644 --- a/Documentation/filesystems/ext4/journal.rst +++ b/Documentation/filesystems/ext4/journal.rst @@ -681,3 +681,53 @@ Here is the list of supported tags and their meanings: - Stores the TID of the commit, CRC of the fast commit of which this tag represents the end of +Fast Commit Replay Idempotence +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fast commits tags are idempotent in nature provided the recovery code follows +certain rules. The guiding principle that the commit path follows while +committing is that it stores the result of a particular operation instead of +storing the procedure. + +Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' +was associated with inode 10. During fast commit, instead of storing this +operation as a procedure "rename a to b", we store the resulting file system +state as a "series" of outcomes: + +- Link dirent b to inode 10 +- Unlink dirent a +- Inode 10 with valid refcount + +Now when recovery code runs, it needs "enforce" this state on the file +system. This is what guarantees idempotence of fast commit replay. + +Let's take an example of a procedure that is not idempotent and see how fast +commits make it idempotent. Consider following sequence of operations: + +1) rm A +2) mv B A +3) read A + +If we store this sequence of operations as is then the replay is not idempotent. +Let's say while in replay, we crash after (2). During the second replay, +file A (which was actually created as a result of "mv B A" operation) would get +deleted. Thus, file named A would be absent when we try to read A. So, this +sequence of operations is not idempotent. However, as mentioned above, instead +of storing the procedure fast commits store the outcome of each procedure. Thus +the fast commit log for above procedure would be as follows: + +(Let's assume dirent A was linked to inode 10 and dirent B was linked to +inode 11 before the replay) + +1) Unlink A +2) Link A to inode 11 +3) Unlink B +4) Inode 11 + +If we crash after (3) we will have file A linked to inode 11. During the second +replay, we will remove file A (inode 11). But we will create it back and make +it point to inode 11. We won't find B, so we'll just skip that step. At this +point, the refcount for inode 11 is not reliable, but that gets fixed by the +replay of last inode 11 tag. Thus, by converting a non-idempotent procedure +into a series of idempotent outcomes, fast commits ensured idempotence during +the replay. diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 1879f881c300..230ee42f872f 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -75,44 +75,44 @@ and elsewhere regarding submitting Linux kernel patches. 13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and ``CONFIG_PREEMPT.`` -16) All codepaths have been exercised with all lockdep features enabled. +14) All codepaths have been exercised with all lockdep features enabled. -17) All new ``/proc`` entries are documented under ``Documentation/`` +15) All new ``/proc`` entries are documented under ``Documentation/`` -18) All new kernel boot parameters are documented in +16) All new kernel boot parameters are documented in ``Documentation/admin-guide/kernel-parameters.rst``. -19) All new module parameters are documented with ``MODULE_PARM_DESC()`` +17) All new module parameters are documented with ``MODULE_PARM_DESC()`` -20) All new userspace interfaces are documented in ``Documentation/ABI/``. +18) All new userspace interfaces are documented in ``Documentation/ABI/``. See ``Documentation/ABI/README`` for more information. Patches that change userspace interfaces should be CCed to linux-api@vger.kernel.org. -21) Check that it all passes ``make headers_check``. +19) Check that it all passes ``make headers_check``. -22) Has been checked with injection of at least slab and page-allocation +20) Has been checked with injection of at least slab and page-allocation failures. See ``Documentation/fault-injection/``. If the new code is substantial, addition of subsystem-specific fault injection might be appropriate. -23) Newly-added code has been compiled with ``gcc -W`` (use +21) Newly-added code has been compiled with ``gcc -W`` (use ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good for finding bugs like "warning: comparison between signed and unsigned". -24) Tested after it has been merged into the -mm patchset to make sure +22) Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. -25) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a +23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a comment in the source code that explains the logic of what they are doing and why. -26) If any ioctl's are added by the patch, then also update +24) If any ioctl's are added by the patch, then also update ``Documentation/userspace-api/ioctl/ioctl-number.rst``. -27) If your modified source code depends on or uses any of the kernel +25) If your modified source code depends on or uses any of the kernel APIs or features that are related to the following ``Kconfig`` symbols, then test multiple builds with the related ``Kconfig`` symbols disabled and/or ``=m`` (if that option is available) [not all of these at the diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index fb8261a4be30..5ba54120bef7 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -411,6 +411,12 @@ Some people also put extra tags at the end. They'll just be ignored for now, but you can do this to mark internal company procedures or just point out some special detail about the sign-off. +Any further SoBs (Signed-off-by:'s) following the author's SoB are from +people handling and transporting the patch, but were not involved in its +development. SoB chains should reflect the **real** route a patch took +as it was propagated to the maintainers and ultimately to Linus, with +the first SoB entry signalling primary authorship of a single author. + When to use Acked-by:, Cc:, and Co-developed-by: ------------------------------------------------ @@ -446,7 +452,7 @@ patch. This tag documents that potentially interested parties have been included in the discussion. Co-developed-by: states that the patch was co-created by multiple developers; -it is a used to give attribution to co-authors (in addition to the author +it is used to give attribution to co-authors (in addition to the author attributed by the From: tag) when several people work on a single patch. Since Co-developed-by: denotes authorship, every Co-developed-by: must be immediately followed by a Signed-off-by: of the associated co-author. Standard sign-off diff --git a/MAINTAINERS b/MAINTAINERS index 82ea236107c9..a9ce0b44f210 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3199,6 +3199,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: block/ F: drivers/block/ +F: fs/block_dev.c F: include/linux/blk* F: kernel/trace/blktrace.c F: lib/sbitmap.c @@ -4354,8 +4355,8 @@ T: git git://linuxtv.org/media_tree.git F: drivers/media/pci/cobalt/ COCCINELLE/Semantic Patches (SmPL) -M: Julia Lawall -M: Gilles Muller +M: Julia Lawall +M: Gilles Muller M: Nicolas Palix M: Michal Marek L: cocci@systeme.lip6.fr (moderated for non-subscribers) diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 0a2ab6cb18db..e5d870ff225f 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -7,7 +7,7 @@ * * Code supporting the Jensen. */ - +#include #include #include #include diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 3ee4f4381985..9de7ab2ce05d 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -66,24 +66,17 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) #define MAX_UNCOMP_KERNEL_SIZE SZ_32M /* - * phys-to-virt patching requires that the physical to virtual offset fits - * into the immediate field of an add/sub instruction, which comes down to the - * 24 least significant bits being zero, and so the offset should be a multiple - * of 16 MB. Since PAGE_OFFSET itself is a multiple of 16 MB, the physical - * base should be aligned to 16 MB as well. + * phys-to-virt patching requires that the physical to virtual offset is a + * multiple of 2 MiB. However, using an alignment smaller than TEXT_OFFSET + * here throws off the memory allocation logic, so let's use the lowest power + * of two greater than 2 MiB and greater than TEXT_OFFSET. */ -#define EFI_PHYS_ALIGN SZ_16M - -/* on ARM, the FDT should be located in a lowmem region */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M; -} +#define EFI_PHYS_ALIGN max(UL(SZ_2M), roundup_pow_of_two(TEXT_OFFSET)) /* on ARM, the initrd should be loaded in a lowmem region */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { - return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M; + return round_down(image_addr, SZ_4M) + SZ_512M; } struct efi_arm_entry_state { @@ -93,4 +86,9 @@ struct efi_arm_entry_state { u32 sctlr_after_ebs; }; +static inline void efi_capsule_flush_cache_range(void *addr, int size) +{ + __cpuc_flush_dcache_area(addr, size); +} + #endif /* _ASM_ARM_EFI_H */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cfdb9b37ec0a..0c7547474da9 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -553,7 +553,7 @@ void show_ipi_list(struct seq_file *p, int prec) seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 510cf517f007..0387476a1b48 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1876,6 +1876,7 @@ config EFI select EFI_RUNTIME_WRAPPERS select EFI_STUB select EFI_GENERIC_STUB + imply IMA_SECURE_AND_OR_TRUSTED_BOOT default y help This option provides support for runtime services provided diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 973b14415271..3578aba9c608 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -64,12 +64,6 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define EFI_KIMG_ALIGN \ (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN) -/* on arm64, the FDT may be located anywhere in system RAM */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return ULONG_MAX; -} - /* * On arm64, we have to ensure that the initrd ends up in the linear region, * which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is @@ -141,4 +135,9 @@ static inline void efi_set_pgd(struct mm_struct *mm) void efi_virtmap_load(void); void efi_virtmap_unload(void); +static inline void efi_capsule_flush_cache_range(void *addr, int size) +{ + __flush_dcache_area(addr, size); +} + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 69aa5619b661..c2cf7c0aaee0 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -868,7 +868,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_types[i]); } diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index e76c86619949..49cd6d2caefb 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -216,12 +216,9 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto skip; seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP + for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, j)); seq_printf(p, " %14s", irq_desc_get_chip(desc)->name); #ifndef PARISC_IRQ_CR16_COUNTS diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ec0b2186e41c..2b8da923ceca 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -373,6 +373,8 @@ initrd-y := $(filter-out $(image-y), $(initrd-y)) targets += $(image-y) $(initrd-y) targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ $(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y)))) +targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ + $(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y)))) $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cfa814824285..cc1bca571332 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -180,7 +180,12 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define VCPU_GPR(n) __VCPU_GPR(__REG_##n) #ifdef __KERNEL__ -#ifdef CONFIG_PPC64 + +/* + * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit + * version below in the else case of the ifdef. + */ +#ifdef __powerpc64__ #define STACKFRAMESIZE 256 #define __STK_REG(i) (112 + ((i)-14)*8) diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index b558b07959ce..881f655caa0a 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -49,7 +49,7 @@ static inline unsigned long get_tbl(void) return mftb(); } -static inline u64 get_tb(void) +static __always_inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 541664d95702..a2f72c966baf 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -121,18 +121,28 @@ #ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SRR0 mtctr r11 -#endif + andi. r11, r9, MSR_PR + mr r11, r1 + lwz r1,TASK_STACK-THREAD(r12) + beq- 99f + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r10 + isync + tovirt(r12, r12) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11, r1 +#else andi. r11, r9, MSR_PR lwz r11,TASK_STACK-THREAD(r12) beq- 99f addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#ifdef CONFIG_VMAP_STACK - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 - isync + tophys(r11, r11) + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1, r11) /* set new kernel sp */ #endif - tovirt_vmstack r12, r12 - tophys_novmstack r11, r11 mflr r10 stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK @@ -140,9 +150,6 @@ #else mfspr r10,SPRN_SRR0 #endif - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2b9b1bb4c5f2..9e2246e80efd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -990,7 +990,7 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static int init_big_cores(void) +static int __init init_big_cores(void) { int cpu; diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 59aa2944ecae..9cb6f524854b 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o @@ -27,7 +27,7 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) endif targets := $(obj-vdso32) vdso32.so.dbg diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index d365810a689a..bf363ff37152 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 7542282f1141..6d98cd999680 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -27,12 +27,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) -/* on RISC-V, the FDT may be located anywhere in system RAM */ -static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) -{ - return ULONG_MAX; -} - /* Load initrd at enough distance from DRAM start */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 13ba533f462b..bf5379135e39 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -176,7 +176,7 @@ void __init setup_bootmem(void) * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed * as it is unusable by kernel. */ - memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); + memblock_enforce_memory_limit(-PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3514420f0259..f8a8b9428ae2 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -124,7 +124,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq) raw_spin_lock_irqsave(&desc->lock, flags); seq_printf(p, "%3d: ", irq); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu)); if (desc->irq_data.chip) seq_printf(p, " %8s", desc->irq_data.chip->name); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 40b8fd375d52..e0bc3988c3fa 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -35,7 +35,7 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse -KBUILD_CFLAGS += -ffreestanding +KBUILD_CFLAGS += -ffreestanding -fshort-wchar KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index bc9758ef292e..c98f78330b09 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -213,8 +213,6 @@ static inline bool efi_is_64bit(void) static inline bool efi_is_native(void) { - if (!IS_ENABLED(CONFIG_X86_64)) - return true; return efi_is_64bit(); } @@ -382,4 +380,7 @@ static inline void efi_fake_memmap_early(void) } #endif +#define arch_ima_efi_boot_mode \ + ({ extern struct boot_params boot_params; boot_params.secure_boot; }) + #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 68608bd892c0..5eeb808eb024 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -161,5 +161,3 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif - -obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 0a2ec801b63f..f5477eab5692 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -25,6 +25,7 @@ * * Send feedback to */ +#include #include #include #include diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 205a9bc981b0..7d7ffb9c826a 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -93,37 +93,22 @@ static efi_system_table_t __init *xen_efi_probe(void) /* * Determine whether we're in secure boot mode. - * - * Please keep the logic in sync with - * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot(). */ static enum efi_secureboot_mode xen_efi_get_secureboot(void) { - static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; + enum efi_secureboot_mode mode; efi_status_t status; - u8 moksbstate, secboot, setupmode; + u8 moksbstate; unsigned long size; - size = sizeof(secboot); - status = efi.get_variable(L"SecureBoot", &efi_variable_guid, - NULL, &size, &secboot); - - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; - - if (status != EFI_SUCCESS) - goto out_efi_err; - - size = sizeof(setupmode); - status = efi.get_variable(L"SetupMode", &efi_variable_guid, - NULL, &size, &setupmode); - - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (secboot == 0 || setupmode == 1) - return efi_secureboot_mode_disabled; + mode = efi_get_secureboot_mode(efi.get_variable); + if (mode == efi_secureboot_mode_unknown) { + pr_err("Could not determine UEFI Secure Boot status.\n"); + return efi_secureboot_mode_unknown; + } + if (mode != efi_secureboot_mode_enabled) + return mode; /* See if a user has put the shim into insecure mode. */ size = sizeof(moksbstate); @@ -140,10 +125,6 @@ static enum efi_secureboot_mode xen_efi_get_secureboot(void) secure_boot_enabled: pr_info("UEFI Secure Boot is enabled.\n"); return efi_secureboot_mode_enabled; - - out_efi_err: - pr_err("Could not determine UEFI Secure Boot status.\n"); - return efi_secureboot_mode_unknown; } void __init xen_efi_init(struct boot_params *boot_params) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ffa418c0dcb1..ac6078a34939 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2185,6 +2185,9 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) WEIGHT_ONE); } + TRACE_IOCG_PATH(iocg_idle, iocg, now, + atomic64_read(&iocg->active_period), + atomic64_read(&ioc->cur_period), vtime); __propagate_weights(iocg, 0, 0, false, now); list_del_init(&iocg->active_list); } diff --git a/block/blk-mq.c b/block/blk-mq.c index b09ce00cc6af..c338c9bc5a2c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -650,6 +650,14 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) if (!IS_ENABLED(CONFIG_SMP) || !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) return false; + /* + * With force threaded interrupts enabled, raising softirq from an SMP + * function call will always result in waking the ksoftirqd thread. + * This is probably worse than completing the request on a different + * cache domain. + */ + if (force_irqthreads) + return false; /* same CPU or cache domain? Complete locally */ if (cpu == rq->mq_ctx->cpu || @@ -1495,31 +1503,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; - /* - * We should be running this queue from one of the CPUs that - * are mapped to it. - * - * There are at least two related races now between setting - * hctx->next_cpu from blk_mq_hctx_next_cpu() and running - * __blk_mq_run_hw_queue(): - * - * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(), - * but later it becomes online, then this warning is harmless - * at all - * - * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(), - * but later it becomes offline, then the warning can't be - * triggered, and we depend on blk-mq timeout handler to - * handle dispatched requests to this hctx - */ - if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && - cpu_online(hctx->next_cpu)) { - printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n", - raw_smp_processor_id(), - cpumask_empty(hctx->cpumask) ? "inactive": "active"); - dump_stack(); - } - /* * We can't run the queue inline with ints disabled. Ensure that * we catch bad users of this early. diff --git a/block/genhd.c b/block/genhd.c index b84b8671e627..73faec438e49 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * gendisk handling + * + * Portions Copyright (C) 2020 Christoph Hellwig */ #include diff --git a/block/partitions/core.c b/block/partitions/core.c index deca253583bd..e7d776db803b 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -2,6 +2,7 @@ /* * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King + * Copyright (C) 2020 Christoph Hellwig */ #include #include diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 442608220b5c..b11b08a60684 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -282,18 +283,19 @@ err: static union acpi_object *int_to_buf(union acpi_object *integer) { - union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4); + union acpi_object *buf = NULL; void *dst = NULL; - if (!buf) - goto err; - if (integer->type != ACPI_TYPE_INTEGER) { WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", integer->type); goto err; } + buf = ACPI_ALLOCATE(sizeof(*buf) + 4); + if (!buf) + goto err; + dst = buf + 1; buf->type = ACPI_TYPE_BUFFER; buf->buffer.length = 4; @@ -478,8 +480,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nd_desc->cmd_mask; if (cmd == ND_CMD_CALL && call_pkg->nd_family) { family = call_pkg->nd_family; - if (!test_bit(family, &nd_desc->bus_family_mask)) + if (family > NVDIMM_BUS_FAMILY_MAX || + !test_bit(family, &nd_desc->bus_family_mask)) return -EINVAL; + family = array_index_nospec(family, + NVDIMM_BUS_FAMILY_MAX + 1); dsm_mask = acpi_desc->family_dsm_mask[family]; guid = to_nfit_bus_uuid(family); } else { diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 92f84ed0ba9e..6727358e147d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -318,7 +318,8 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, blk_queue_logical_block_size(nbd->disk->queue, blksize); blk_queue_physical_block_size(nbd->disk->queue, blksize); - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); return 0; @@ -1476,9 +1477,11 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } else if (nbd_disconnected(nbd->config)) { - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } out: mutex_unlock(&nbd_index_mutex); diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index a7caeedeb198..d4aa6bfc9555 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -432,7 +432,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because * of sysfs link already was removed already. */ - if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { + if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); kfree(dev->blk_symlink_name); module_put(THIS_MODULE); @@ -521,7 +521,8 @@ static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) return 0; out_err: - dev->blk_symlink_name[0] = '\0'; + kfree(dev->blk_symlink_name); + dev->blk_symlink_name = NULL ; return ret; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index a199b190c73d..96e3f9fe8241 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -88,6 +88,8 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); dev->secure_discard = le16_to_cpu(rsp->secure_discard); dev->rotational = rsp->rotational; + dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); + dev->fua = !!(rsp->cache_policy & RNBD_FUA); dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; dev->max_segments = BMAX_SEGMENTS; @@ -347,19 +349,26 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu; struct rtrs_permit *permit; + iu = kzalloc(sizeof(*iu), GFP_KERNEL); + if (!iu) { + return NULL; + } + permit = rnbd_get_permit(sess, con_type, wait ? RTRS_PERMIT_WAIT : RTRS_PERMIT_NOWAIT); - if (unlikely(!permit)) + if (unlikely(!permit)) { + kfree(iu); return NULL; - iu = rtrs_permit_to_pdu(permit); + } + iu->permit = permit; /* * 1st reference is dropped after finishing sending a "user" message, * 2nd reference is dropped after confirmation with the response is * returned. * 1st and 2nd can happen in any order, so the rnbd_iu should be - * released (rtrs_permit returned to ibbtrs) only leased after both + * released (rtrs_permit returned to rtrs) only after both * are finished. */ atomic_set(&iu->refcount, 2); @@ -371,8 +380,10 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { - if (atomic_dec_and_test(&iu->refcount)) + if (atomic_dec_and_test(&iu->refcount)) { rnbd_put_permit(sess, iu->permit); + kfree(iu); + } } static void rnbd_softirq_done_fn(struct request *rq) @@ -382,6 +393,7 @@ static void rnbd_softirq_done_fn(struct request *rq) struct rnbd_iu *iu; iu = blk_mq_rq_to_pdu(rq); + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(sess, iu->permit); blk_mq_end_request(rq, errno_to_blk_status(iu->errno)); } @@ -475,7 +487,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_mark_end(&iu->sglist[0]); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -490,6 +502,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) err = errno; } + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -562,7 +575,8 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); msg.access_mode = dev->access_mode; @@ -570,7 +584,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) WARN_ON(!rnbd_clt_get_dev(dev)); err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_open_conf, &errno, wait); if (err) { rnbd_clt_put_dev(dev); @@ -580,6 +594,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) err = errno; } + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -608,7 +623,8 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); msg.ver = RNBD_PROTO_VER_MAJOR; @@ -624,7 +640,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) goto put_iu; } err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_sess_info_conf, &errno, wait); if (err) { rnbd_clt_put_sess(sess); @@ -634,7 +650,7 @@ put_iu: } else { err = errno; } - + sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -803,7 +819,7 @@ static struct rnbd_clt_session *alloc_sess(const char *sessname) rnbd_init_cpu_qlists(sess->cpu_queues); /* - * That is simple percpu variable which stores cpu indeces, which are + * That is simple percpu variable which stores cpu indices, which are * incremented on each access. We need that for the sake of fairness * to wake up queues in a round-robin manner. */ @@ -1014,11 +1030,10 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, * See queue limits. */ if (req_op(rq) != REQ_OP_DISCARD) - sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sglist); + sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl); if (sg_cnt == 0) - /* Do not forget to mark the end */ - sg_mark_end(&iu->sglist[0]); + sg_mark_end(&iu->sgt.sgl[0]); msg.hdr.type = cpu_to_le16(RNBD_MSG_IO); msg.device_id = cpu_to_le32(dev->device_id); @@ -1027,13 +1042,13 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, .iov_base = &msg, .iov_len = sizeof(msg) }; - size = rnbd_clt_get_sg_size(iu->sglist, sg_cnt); + size = rnbd_clt_get_sg_size(iu->sgt.sgl, sg_cnt); req_ops = (struct rtrs_clt_req_ops) { .priv = iu, .conf_fn = msg_io_conf, }; err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, - &vec, 1, size, iu->sglist, sg_cnt); + &vec, 1, size, iu->sgt.sgl, sg_cnt); if (unlikely(err)) { rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", err); @@ -1120,6 +1135,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, struct rnbd_clt_dev *dev = rq->rq_disk->private_data; struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); int err; + blk_status_t ret = BLK_STS_IOERR; if (unlikely(dev->dev_state != DEV_STATE_MAPPED)) return BLK_STS_IOERR; @@ -1131,32 +1147,35 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_RESOURCE; } + iu->sgt.sgl = iu->first_sgl; + err = sg_alloc_table_chained(&iu->sgt, + /* Even-if the request has no segment, + * sglist must have one entry at least */ + blk_rq_nr_phys_segments(rq) ? : 1, + iu->sgt.sgl, + RNBD_INLINE_SG_CNT); + if (err) { + rnbd_clt_err_rl(dev, "sg_alloc_table_chained ret=%d\n", err); + rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); + rnbd_put_permit(dev->sess, iu->permit); + return BLK_STS_RESOURCE; + } + blk_mq_start_request(rq); err = rnbd_client_xfer_request(dev, rq, iu); if (likely(err == 0)) return BLK_STS_OK; if (unlikely(err == -EAGAIN || err == -ENOMEM)) { rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); - rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_RESOURCE; + ret = BLK_STS_RESOURCE; } - + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_IOERR; -} - -static int rnbd_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); - - sg_init_table(iu->sglist, BMAX_SEGMENTS); - return 0; + return ret; } static struct blk_mq_ops rnbd_mq_ops = { .queue_rq = rnbd_queue_rq, - .init_request = rnbd_init_request, .complete = rnbd_softirq_done_fn, }; @@ -1170,7 +1189,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_TAG_QUEUE_SHARED; - tag_set->cmd_size = sizeof(struct rnbd_iu); + tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; tag_set->nr_hw_queues = num_online_cpus(); return blk_mq_alloc_tag_set(tag_set); @@ -1208,7 +1227,7 @@ find_and_get_or_create_sess(const char *sessname, */ sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, paths, path_cnt, port_nr, - sizeof(struct rnbd_iu), + 0, /* Do not use pdu of rtrs */ RECONNECT_DELAY, BMAX_SEGMENTS, BLK_MAX_SEGMENT_SIZE, MAX_RECONNECTS); @@ -1305,7 +1324,7 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_max_segments(dev->queue, dev->max_segments); blk_queue_io_opt(dev->queue, dev->sess->max_io_size); blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, true, true); + blk_queue_write_cache(dev->queue, dev->wc, dev->fua); dev->queue->queuedata = dev; } @@ -1388,12 +1407,11 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_queues; } - dev->pathname = kzalloc(strlen(pathname) + 1, GFP_KERNEL); + dev->pathname = kstrdup(pathname, GFP_KERNEL); if (!dev->pathname) { ret = -ENOMEM; goto out_queues; } - strlcpy(dev->pathname, pathname, strlen(pathname) + 1); dev->clt_device_id = ret; dev->sess = sess; @@ -1529,13 +1547,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, } rnbd_clt_info(dev, - "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d)\n", + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n", dev->gd->disk_name, dev->nsectors, dev->logical_block_size, dev->physical_block_size, dev->max_write_same_sectors, dev->max_discard_sectors, dev->discard_granularity, dev->discard_alignment, dev->secure_discard, dev->max_segments, - dev->max_hw_sectors, dev->rotational); + dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua); mutex_unlock(&dev->lock); @@ -1667,7 +1685,7 @@ static void rnbd_destroy_sessions(void) /* * Here at this point there is no any concurrent access to sessions * list and devices list: - * 1. New session or device can'be be created - session sysfs files + * 1. New session or device can't be created - session sysfs files * are removed. * 2. Device or session can't be removed - module reference is taken * into account in unmap device sysfs callback. diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index b193d5904050..537d499dad3b 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -44,6 +44,13 @@ struct rnbd_iu_comp { int errno; }; +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define RNBD_INLINE_SG_CNT 0 +#else +#define RNBD_INLINE_SG_CNT 2 +#endif +#define RNBD_RDMA_SGL_SIZE (sizeof(struct scatterlist) * RNBD_INLINE_SG_CNT) + struct rnbd_iu { union { struct request *rq; /* for block io */ @@ -56,11 +63,12 @@ struct rnbd_iu { /* use to send msg associated with a sess */ struct rnbd_clt_session *sess; }; - struct scatterlist sglist[BMAX_SEGMENTS]; + struct sg_table sgt; struct work_struct work; int errno; struct rnbd_iu_comp comp; atomic_t refcount; + struct scatterlist first_sgl[]; /* must be the last one */ }; struct rnbd_cpu_qlist { @@ -112,6 +120,8 @@ struct rnbd_clt_dev { enum rnbd_access_mode access_mode; bool read_only; bool rotational; + bool wc; + bool fua; u32 max_hw_sectors; u32 max_write_same_sectors; u32 max_discard_sectors; diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index ca166241452c..c1bc5c0fef71 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -108,6 +108,11 @@ struct rnbd_msg_close { __le32 device_id; }; +enum rnbd_cache_policy { + RNBD_FUA = 1 << 0, + RNBD_WRITEBACK = 1 << 1, +}; + /** * struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN * @hdr: message header @@ -124,6 +129,7 @@ struct rnbd_msg_close { * @max_segments: max segments hardware support in one transfer * @secure_discard: supports secure discard * @rotation: is a rotational disc? + * @cache_policy: support write-back caching or FUA? */ struct rnbd_msg_open_rsp { struct rnbd_msg_hdr hdr; @@ -139,7 +145,8 @@ struct rnbd_msg_open_rsp { __le16 max_segments; __le16 secure_discard; u8 rotational; - u8 reserved[11]; + u8 cache_policy; + u8 reserved[10]; }; /** diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index d1ee72ed8384..b8e44331e494 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -338,9 +338,10 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) { + mutex_lock(&sess_dev->sess->lock); rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess_dev->sess->lock); sess_dev->keep_id = true; - } static int process_msg_close(struct rtrs_srv *rtrs, @@ -549,6 +550,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev; + struct request_queue *q = bdev_get_queue(rnbd_dev->bdev); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = @@ -573,8 +575,12 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); - rsp->rotational = - !blk_queue_nonrot(bdev_get_queue(rnbd_dev->bdev)); + rsp->rotational = !blk_queue_nonrot(q); + rsp->cache_policy = 0; + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + rsp->cache_policy |= RNBD_WRITEBACK; + if (blk_queue_fua(q)) + rsp->cache_policy |= RNBD_FUA; } static struct rnbd_srv_sess_dev * diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 27513d311242..737b207c9e30 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -367,19 +367,28 @@ void kill_dev_dax(struct dev_dax *dev_dax) } EXPORT_SYMBOL_GPL(kill_dev_dax); -static void free_dev_dax_ranges(struct dev_dax *dev_dax) +static void trim_dev_dax_range(struct dev_dax *dev_dax) { + int i = dev_dax->nr_range - 1; + struct range *range = &dev_dax->ranges[i].range; struct dax_region *dax_region = dev_dax->region; - int i; device_lock_assert(dax_region->dev); - for (i = 0; i < dev_dax->nr_range; i++) { - struct range *range = &dev_dax->ranges[i].range; + dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, + (unsigned long long)range->start, + (unsigned long long)range->end); - __release_region(&dax_region->res, range->start, - range_len(range)); + __release_region(&dax_region->res, range->start, range_len(range)); + if (--dev_dax->nr_range == 0) { + kfree(dev_dax->ranges); + dev_dax->ranges = NULL; } - dev_dax->nr_range = 0; +} + +static void free_dev_dax_ranges(struct dev_dax *dev_dax) +{ + while (dev_dax->nr_range) + trim_dev_dax_range(dev_dax); } static void unregister_dev_dax(void *dev) @@ -763,22 +772,14 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, return 0; } - ranges = krealloc(dev_dax->ranges, sizeof(*ranges) - * (dev_dax->nr_range + 1), GFP_KERNEL); - if (!ranges) + alloc = __request_region(res, start, size, dev_name(dev), 0); + if (!alloc) return -ENOMEM; - alloc = __request_region(res, start, size, dev_name(dev), 0); - if (!alloc) { - /* - * If this was an empty set of ranges nothing else - * will release @ranges, so do it now. - */ - if (!dev_dax->nr_range) { - kfree(ranges); - ranges = NULL; - } - dev_dax->ranges = ranges; + ranges = krealloc(dev_dax->ranges, sizeof(*ranges) + * (dev_dax->nr_range + 1), GFP_KERNEL); + if (!ranges) { + __release_region(res, alloc->start, resource_size(alloc)); return -ENOMEM; } @@ -804,15 +805,10 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, return 0; rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); - if (rc) { - dev_dbg(dev, "delete range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, - &alloc->start, &alloc->end); - dev_dax->nr_range--; - __release_region(res, alloc->start, resource_size(alloc)); - return rc; - } + if (rc) + trim_dev_dax_range(dev_dax); - return 0; + return rc; } static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) @@ -885,12 +881,7 @@ static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) if (shrink >= range_len(range)) { devm_release_action(dax_region->dev, unregister_dax_mapping, &mapping->dev); - __release_region(&dax_region->res, range->start, - range_len(range)); - dev_dax->nr_range--; - dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i, - (unsigned long long) range->start, - (unsigned long long) range->end); + trim_dev_dax_range(dev_dax); to_shrink -= shrink; if (!to_shrink) break; @@ -1114,16 +1105,9 @@ static ssize_t align_show(struct device *dev, static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) { - resource_size_t dev_size = dev_dax_size(dev_dax); struct device *dev = &dev_dax->dev; int i; - if (dev_size > 0 && !alloc_is_aligned(dev_dax, dev_size)) { - dev_dbg(dev, "%s: align %u invalid for size %pa\n", - __func__, dev_dax->align, &dev_size); - return -EINVAL; - } - for (i = 0; i < dev_dax->nr_range; i++) { size_t len = range_len(&dev_dax->ranges[i].range); @@ -1274,7 +1258,6 @@ static void dev_dax_release(struct device *dev) put_dax(dax_dev); free_dev_dax_id(dev_dax); dax_region_put(dax_region); - kfree(dev_dax->ranges); kfree(dev_dax->pgmap); kfree(dev_dax); } diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index 62b26bfceab1..062e8bc14223 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -52,7 +52,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) /* adjust the dax_region range to the start of data */ range = pgmap.range; - range.start += offset, + range.start += offset; dax_region = alloc_dax_region(dev, region_id, &range, nd_region->target_node, le32_to_cpu(pfn_sb->align), IORESOURCE_DAX_STATIC); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index edc279be3e59..cadbd0a1a1ef 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -752,6 +752,7 @@ err_chrdev: static void __exit dax_core_exit(void) { + dax_bus_exit(); unregister_chrdev_region(dax_devt, MINORMASK+1); ida_destroy(&dax_minor_ida); dax_fs_exit(); diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index ce49871e6f3e..b8fcc2e004ab 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -20,6 +20,7 @@ #include #include #include +#include struct cma_heap { diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index d9895491ff34..2c3dac5ecb36 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -122,7 +122,7 @@ config EFI_ARMSTUB_DTB_LOADER config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER bool "Enable the command line initrd loader" if !X86 depends on EFI_STUB && (EFI_GENERIC_STUB || X86) - default y + default y if X86 depends on !RISCV help Select this config option to add support for the initrd= command @@ -147,7 +147,7 @@ config EFI_BOOTLOADER_CONTROL config EFI_CAPSULE_LOADER tristate "EFI capsule loader" - depends on EFI + depends on EFI && !IA64 help This option exposes a loader interface "/dev/efi_capsule_loader" for users to load EFI capsules. This driver requires working runtime diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index d6ca2da19339..467e94259679 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -12,7 +12,10 @@ KASAN_SANITIZE_runtime-wrappers.o := n obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o tpm.o -obj-$(CONFIG_EFI) += capsule.o memmap.o +obj-$(CONFIG_EFI) += memmap.o +ifneq ($(CONFIG_EFI_CAPSULE_LOADER),) +obj-$(CONFIG_EFI) += capsule.o +endif obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c index 598b7800d14e..768430293669 100644 --- a/drivers/firmware/efi/capsule.c +++ b/drivers/firmware/efi/capsule.c @@ -12,6 +12,7 @@ #include #include #include +#include #include typedef struct { @@ -244,7 +245,7 @@ int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages) for (i = 0; i < sg_count; i++) { efi_capsule_block_desc_t *sglist; - sglist = kmap(sg_pages[i]); + sglist = kmap_atomic(sg_pages[i]); for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) { u64 sz = min_t(u64, imagesize, @@ -265,7 +266,18 @@ int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages) else sglist[j].data = page_to_phys(sg_pages[i + 1]); - kunmap(sg_pages[i]); +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + /* + * At runtime, the firmware has no way to find out where the + * sglist elements are mapped, if they are mapped in the first + * place. Therefore, on architectures that can only perform + * cache maintenance by virtual address, the firmware is unable + * to perform this maintenance, and so it is up to the OS to do + * it instead. + */ + efi_capsule_flush_cache_range(sglist, PAGE_SIZE); +#endif + kunmap_atomic(sglist); } mutex_lock(&capsule_mutex); diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 914a343c7785..ec2f3985bef3 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -273,7 +273,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, install_memreserve_table(); status = allocate_new_fdt_and_exit_boot(handle, &fdt_addr, - efi_get_max_fdt_addr(image_addr), initrd_addr, initrd_size, cmdline_ptr, fdt_addr, fdt_size); if (status != EFI_SUCCESS) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 2d7abcd99de9..b50a6c67d9bd 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -750,7 +750,6 @@ efi_status_t efi_exit_boot_services(void *handle, efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, - unsigned long max_addr, u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, @@ -848,4 +847,6 @@ asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint, void efi_handle_post_ebs_state(void); +enum efi_secureboot_mode efi_get_secureboot(void); + #endif diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 368cd60000ee..365c3a43a198 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -238,7 +238,6 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, - unsigned long max_addr, u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, @@ -275,7 +274,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, efi_info("Exiting boot services and installing virtual address map...\n"); map.map = &memory_map; - status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr); + status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, ULONG_MAX); if (status != EFI_SUCCESS) { efi_err("Unable to allocate memory for new device tree.\n"); goto fail; diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 5efc524b14be..8a18930f3eb6 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -12,44 +12,34 @@ #include "efistub.h" -/* BIOS variables */ -static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; -static const efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; -static const efi_char16_t efi_SetupMode_name[] = L"SetupMode"; - /* SHIM variables */ static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; static const efi_char16_t shim_MokSBState_name[] = L"MokSBState"; +static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, + unsigned long *data_size, void *data) +{ + return get_efi_var(name, vendor, attr, data_size, data); +} + /* * Determine whether we're in secure boot mode. - * - * Please keep the logic in sync with - * arch/x86/xen/efi.c:xen_efi_get_secureboot(). */ enum efi_secureboot_mode efi_get_secureboot(void) { u32 attr; - u8 secboot, setupmode, moksbstate; unsigned long size; + enum efi_secureboot_mode mode; efi_status_t status; + u8 moksbstate; - size = sizeof(secboot); - status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, - NULL, &size, &secboot); - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; - if (status != EFI_SUCCESS) - goto out_efi_err; - - size = sizeof(setupmode); - status = get_efi_var(efi_SetupMode_name, &efi_variable_guid, - NULL, &size, &setupmode); - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (secboot == 0 || setupmode == 1) - return efi_secureboot_mode_disabled; + mode = efi_get_secureboot_mode(get_var); + if (mode == efi_secureboot_mode_unknown) { + efi_err("Could not determine UEFI Secure Boot status.\n"); + return efi_secureboot_mode_unknown; + } + if (mode != efi_secureboot_mode_enabled) + return mode; /* * See if a user has put the shim into insecure mode. If so, and if the @@ -69,8 +59,4 @@ enum efi_secureboot_mode efi_get_secureboot(void) secure_boot_enabled: efi_info("UEFI Secure Boot is enabled.\n"); return efi_secureboot_mode_enabled; - -out_efi_err: - efi_err("Could not determine UEFI Secure Boot status.\n"); - return efi_secureboot_mode_unknown; } diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 3672539cb96e..f14c4ff5839f 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -715,8 +715,11 @@ unsigned long efi_main(efi_handle_t handle, (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || (image_offset == 0)) { + extern char _bss[]; + status = efi_relocate_kernel(&bzimage_addr, - hdr->init_size, hdr->init_size, + (unsigned long)_bss - bzimage_addr, + hdr->init_size, hdr->pref_address, hdr->kernel_alignment, LOAD_PHYSICAL_ADDR); diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index ddf9eae396fe..47d67bb0a516 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -663,6 +663,19 @@ out: return rv; } +static long efi_runtime_get_supported_mask(unsigned long arg) +{ + unsigned int __user *supported_mask; + int rv = 0; + + supported_mask = (unsigned int *)arg; + + if (put_user(efi.runtime_supported_mask, supported_mask)) + rv = -EFAULT; + + return rv; +} + static long efi_test_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -699,6 +712,9 @@ static long efi_test_ioctl(struct file *file, unsigned int cmd, case EFI_RUNTIME_RESET_SYSTEM: return efi_runtime_reset_system(arg); + + case EFI_RUNTIME_GET_SUPPORTED_MASK: + return efi_runtime_get_supported_mask(arg); } return -ENOTTY; diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h index f2446aa1c2e3..117349e57993 100644 --- a/drivers/firmware/efi/test/efi_test.h +++ b/drivers/firmware/efi/test/efi_test.h @@ -118,4 +118,7 @@ struct efi_resetsystem { #define EFI_RUNTIME_RESET_SYSTEM \ _IOW('p', 0x0B, struct efi_resetsystem) +#define EFI_RUNTIME_GET_SUPPORTED_MASK \ + _IOR('p', 0x0C, unsigned int) + #endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 65d1b23d7e74..b9c11c2b2885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1414,10 +1414,12 @@ out: pm_runtime_put_autosuspend(connector->dev->dev); } - drm_dp_set_subconnector_property(&amdgpu_connector->base, - ret, - amdgpu_dig_connector->dpcd, - amdgpu_dig_connector->downstream_ports); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_eDP) + drm_dp_set_subconnector_property(&amdgpu_connector->base, + ret, + amdgpu_dig_connector->dpcd, + amdgpu_dig_connector->downstream_ports); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7d2f7a2240b8..1cb7d73f7317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5069,8 +5069,7 @@ out: * @pdev: pointer to PCI device * * Called when the error recovery driver tells us that its - * OK to resume normal operation. Use completion to allow - * halted scsi ops to resume. + * OK to resume normal operation. */ void amdgpu_pci_resume(struct pci_dev *pdev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c2ced5be6d7b..6e679db5e46f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -496,7 +496,8 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) break; } - if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { + if (amdgpu_sriov_vf(adev) || + !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { size = 0; } else { size = amdgpu_gmc_get_vbios_fb_size(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fc9bb94eaaf4..5f4805e4d04a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1647,7 +1647,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } /* No CPG in Arcturus */ - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); if (r) return r; @@ -2633,7 +2633,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + u32 tmp; + + /* don't toggle interrupts that are only applicable + * to me0 pipe0 on AISCs that have me0 removed */ + if (!adev->gfx.num_gfx_rings) + return; + + tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); @@ -3822,7 +3829,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { /* legacy firmware loading */ r = gfx_v9_0_cp_gfx_load_microcode(adev); if (r) @@ -3838,7 +3845,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { r = gfx_v9_0_cp_gfx_resume(adev); if (r) return r; @@ -3848,7 +3855,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->gfx.num_gfx_rings) { ring = &adev->gfx.gfx_ring[0]; r = amdgpu_ring_test_helper(ring); if (r) @@ -3884,7 +3891,7 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->gfx.num_gfx_rings) gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -4025,7 +4032,7 @@ static int gfx_v9_0_soft_reset(void *handle) /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->gfx.num_gfx_rings) /* Disable GFX parsing/prefetching */ gfx_v9_0_cp_gfx_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e1531d97f486..e22268f9dba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1577,13 +1577,10 @@ static int gmc_v9_0_hw_init(void *handle) gmc_v9_0_init_golden_registers(adev); if (adev->mode_info.num_crtc) { - if (adev->asic_type != CHIP_ARCTURUS) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - } + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); } amdgpu_device_program_register_sequence(adev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 50922ff2927b..72c893fff61a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -422,7 +422,7 @@ static const struct kfd_device_info navi10_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -440,7 +440,7 @@ static const struct kfd_device_info navi12_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -458,7 +458,7 @@ static const struct kfd_device_info navi14_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -476,7 +476,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 4, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -494,7 +494,7 @@ static const struct kfd_device_info navy_flounder_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -530,7 +530,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2c4dbdeec46a..519080e9a233 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2386,7 +2386,8 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - drm_add_edid_modes(connector, aconnector->edid); + aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); + drm_connector_list_update(connector); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, @@ -9367,7 +9368,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (ret) goto fail; - if (dm_old_crtc_state->dsc_force_changed && new_crtc_state) + if (dm_old_crtc_state->dsc_force_changed) new_crtc_state->mode_changed = true; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 357778556b06..26ed70e5538a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -165,7 +165,10 @@ static struct list_head *remove_irq_handler(struct amdgpu_device *adev, handler = list_entry(entry, struct amdgpu_dm_irq_handler_data, list); - if (ih == handler) { + if (handler == NULL) + continue; + + if (ih == handler->handler) { /* Found our handler. Remove it from the list. */ list_del(&handler->list); handler_removed = true; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index d00b02553d62..01b1853b7750 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -75,15 +75,8 @@ int rn_get_active_display_cnt_wa( for (i = 0; i < dc->link_count; i++) { const struct dc_link *link = dc->links[i]; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL || - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } @@ -234,12 +227,11 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - rn_update_clocks_update_dpp_dto( - clk_mgr, - context, - clk_mgr_base->clks.actual_dppclk_khz, - safe_to_lower); + rn_update_clocks_update_dpp_dto( + clk_mgr, + context, + clk_mgr_base->clks.actual_dppclk_khz, + safe_to_lower); } if (update_dispclk && @@ -738,32 +730,32 @@ static struct wm_table ddr4_wm_table_rn = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9.09, - .sr_enter_plus_exit_time_us = 10.14, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.12, - .sr_enter_plus_exit_time_us = 12.48, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 11a7b583d561..7deeec9d1c7c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -99,7 +99,7 @@ int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned /* Trigger the message transaction by writing the message ID */ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id); - result = rn_smu_wait_for_response(clk_mgr, 10, 1000); + result = rn_smu_wait_for_response(clk_mgr, 10, 200000); ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 9a8e66bba9c0..991b9c5beaa3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -74,15 +74,8 @@ int vg_get_active_display_cnt_wa( for (i = 0; i < dc->link_count; i++) { const struct dc_link *link = dc->links[i]; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL || - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7339d9855ec8..58eb0d69873a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2625,26 +2625,6 @@ static void commit_planes_for_stream(struct dc *dc, } } - if (update_type != UPDATE_TYPE_FAST) { - // If changing VTG FP2: wait until back in vactive to program FP2 - // Need to ensure that pipe unlock happens soon after to minimize race condition - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->top_pipe || pipe_ctx->stream != stream) - continue; - - if (!pipe_ctx->update_flags.bits.global_sync) - continue; - - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); - pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); - - pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); - } - } - if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock) dc->hwss.interdependent_update_lock(dc, context, false); else diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6b11d4af54af..2fc12239b22c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3173,13 +3173,7 @@ static void get_active_converter_info( } /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */ - if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) { - link->dpcd_caps.is_branch_dev = false; - } - - else { - link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; - } + link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; switch (ds_port.fields.PORT_TYPE) { case DOWNSTREAM_VGA: diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 41679ad531c5..9e796dfeac20 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1241,6 +1241,22 @@ void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst) REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst); } +bool hubp1_in_blank(struct hubp *hubp) +{ + uint32_t in_blank; + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, HUBP_IN_BLANK, &in_blank); + return in_blank ? true : false; +} + +void hubp1_soft_reset(struct hubp *hubp, bool reset) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0); +} + void hubp1_init(struct hubp *hubp) { //do nothing @@ -1272,6 +1288,8 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .dmdata_set_attributes = NULL, .dmdata_load = NULL, + .hubp_soft_reset = hubp1_soft_reset, + .hubp_in_blank = hubp1_in_blank, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 780af5b3c16f..a9a6ed7f4f99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -260,6 +260,7 @@ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_IN_BLANK, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\ @@ -455,6 +456,7 @@ type HUBP_VTG_SEL;\ type HUBP_UNDERFLOW_STATUS;\ type HUBP_UNDERFLOW_CLEAR;\ + type HUBP_IN_BLANK;\ type NUM_PIPES;\ type NUM_BANKS;\ type PIPE_INTERLEAVE;\ @@ -772,5 +774,7 @@ void hubp1_vready_workaround(struct hubp *hubp, void hubp1_init(struct hubp *hubp); void hubp1_read_state_common(struct hubp *hubp); +bool hubp1_in_blank(struct hubp *hubp); +void hubp1_soft_reset(struct hubp *hubp, bool reset); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 3fcd408e9103..100ce0e28fd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -467,6 +467,17 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) REG_SET(CUR[opp_id], 0, CUR_VUPDATE_LOCK_SET, lock ? 1 : 0); } +unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + uint32_t val = 0; + + if (opp_id < MAX_OPP && REG(MUX[opp_id])) + REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); + + return val; +} + static const struct mpc_funcs dcn10_mpc_funcs = { .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, @@ -483,6 +494,7 @@ static const struct mpc_funcs dcn10_mpc_funcs = { .set_denorm_clamp = NULL, .set_output_csc = NULL, .set_output_gamma = NULL, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; void dcn10_mpc_construct(struct dcn10_mpc *mpc10, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index 66a4719c22a0..dbfffc6383dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -200,4 +200,5 @@ void mpc1_read_mpcc_state( void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock); +unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index b7e44e53a342..0df0da2e6a4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1595,6 +1595,8 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp1_init, .validate_dml_output = hubp2_validate_dml_output, + .hubp_in_blank = hubp1_in_blank, + .hubp_soft_reset = hubp1_soft_reset, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 31a477194d3b..cb822df21b7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1586,7 +1586,10 @@ static void dcn20_program_pipe( && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe) hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible); - if (pipe_ctx->update_flags.bits.global_sync) { + /* Only update TG on top pipe */ + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe + && !pipe_ctx->prev_odm_pipe) { + pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, pipe_ctx->pipe_dlg_param.vready_offset, @@ -1594,8 +1597,11 @@ static void dcn20_program_pipe( pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); if (hws->funcs.setup_vupdate_interrupt) hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); @@ -2570,4 +2576,4 @@ void dcn20_set_disp_pattern_generator(const struct dc *dc, { pipe_ctx->stream_res.opp->funcs->opp_set_disp_pattern_generator(pipe_ctx->stream_res.opp, test_pattern, color_space, color_depth, solid_color, width, height, offset); -} \ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 99cc095dc33c..6a99fdd55e8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -556,6 +556,7 @@ const struct mpc_funcs dcn20_mpc_funcs = { .set_ocsc_default = mpc2_set_ocsc_default, .set_output_gamma = mpc2_set_output_gamma, .power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; void dcn20_mpc_construct(struct dcn20_mpc *mpc20, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index ff36db5edf6c..e04ecf0fc0db 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1933,7 +1933,7 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; else next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp; - if (next_odm_pipe->stream->timing.flags.DSC == 1) { + if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) { dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); ASSERT(next_odm_pipe->stream_res.dsc); if (next_odm_pipe->stream_res.dsc == NULL) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index af462fe4260d..88ffa9ff1ed1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -509,6 +509,8 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_clear_underflow = hubp2_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp3_init, + .hubp_in_blank = hubp1_in_blank, + .hubp_soft_reset = hubp1_soft_reset, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index d7d053fc6e91..3e6f76096119 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -1428,6 +1428,7 @@ const struct mpc_funcs dcn30_mpc_funcs = { .program_3dlut = mpc3_program_3dlut, .release_rmu = mpcc3_release_rmu, .power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 315e3061c592..22f3f643ed1b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -188,6 +188,8 @@ struct hubp_funcs { void (*set_unbounded_requesting)( struct hubp *hubp, bool enable); + bool (*hubp_in_blank)(struct hubp *hubp); + void (*hubp_soft_reset)(struct hubp *hubp, bool reset); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 879f502ae530..75c77ad9cbfe 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -359,6 +359,10 @@ struct mpc_funcs { int (*release_rmu)(struct mpc *mpc, int mpcc_id); + unsigned int (*get_mpc_out_mux)( + struct mpc *mpc, + int opp_id); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index f512bda96917..249a076d6f69 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xa18e25995 +#define DMUB_FW_VERSION_GIT_HASH 0xf51b86a #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 46 +#define DMUB_FW_VERSION_REVISION 47 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index f244b72e74e0..73ca49f05bd3 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -128,8 +128,12 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { - /* device count must be greater than or equal to tracked hdcp displays */ - return (get_device_count(hdcp) < get_active_display_count(hdcp)) ? + /* Some MST display may choose to report the internal panel as an HDCP RX. + * To update this condition with 1(because the immediate repeater's internal + * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). + * Device count must be greater than or equal to tracked hdcp displays. + */ + return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ? MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE : MOD_HDCP_STATUS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 549c113abcf7..a0895a7efda2 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -207,8 +207,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { - /* device count must be greater than or equal to tracked hdcp displays */ - return (get_device_count(hdcp) < get_active_display_count(hdcp)) ? + /* Some MST display may choose to report the internal panel as an HDCP RX. */ + /* To update this condition with 1(because the immediate repeater's internal */ + /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */ + /* Device count must be greater than or equal to tracked hdcp displays. */ + return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ? MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE : MOD_HDCP_STATUS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index cc983f662157..4fd8bce95d84 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -82,22 +82,24 @@ struct abm_parameters { unsigned char deviation_gain; unsigned char min_knee; unsigned char max_knee; + unsigned short blRampReduction; + unsigned short blRampStart; }; static const struct abm_parameters abm_settings_config0[abm_defines_max_level] = { -// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0}, - {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf}, - {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0}, - {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed + {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0, 0xCCCC, 0xCCCC}, + {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf, 0xCCCC, 0xCCCC}, + {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0, 0xCCCC, 0xCCCC}, + {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, }; static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = { -// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed + {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, + {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC}, }; static const struct abm_parameters * const abm_settings[] = { @@ -662,6 +664,7 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, { struct iram_table_v_2_2 ram_table; struct abm_config_table config; + unsigned int set = params.set; bool result = false; uint32_t i, j = 0; @@ -710,6 +713,18 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, config.max_knee[i] = ram_table.max_knee[i]; } + if (params.backlight_ramping_override) { + for (i = 0; i < NUM_AGGR_LEVEL; i++) { + config.blRampReduction[i] = params.backlight_ramping_reduction; + config.blRampStart[i] = params.backlight_ramping_start; + } + } else { + for (i = 0; i < NUM_AGGR_LEVEL; i++) { + config.blRampReduction[i] = abm_settings[set][i].blRampReduction; + config.blRampStart[i] = abm_settings[set][i].blRampStart; + } + } + config.min_abm_backlight = ram_table.min_abm_backlight; #if defined(CONFIG_DRM_AMD_DC_DCN) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index fa4728d88092..6f2eecce6baa 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -39,6 +39,7 @@ enum abm_defines { struct dmcu_iram_parameters { unsigned int *backlight_lut_array; unsigned int backlight_lut_array_size; + bool backlight_ramping_override; unsigned int backlight_ramping_reduction; unsigned int backlight_ramping_start; unsigned int min_abm_backlight; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index e5aa0725147c..13de692a4213 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -30,7 +30,7 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x36 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3B +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3D #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xC #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x02 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 9bccf2ad038c..8cb4fcee9a2c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -724,8 +724,13 @@ static int vangogh_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) static int vangogh_system_features_control(struct smu_context *smu, bool en) { - return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, - en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL); + struct amdgpu_device *adev = smu->adev; + + if (adev->pm.fw_version >= 0x43f1700) + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, + en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL); + else + return 0; } static const struct pptable_funcs vangogh_ppt_funcs = { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 1f8195bad536..ca891ae14d36 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -152,7 +152,6 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) ret = of_reserved_mem_device_init(dev); if (ret && ret != -ENODEV) return ret; - ret = 0; for_each_available_child_of_node(np, child) { if (of_node_name_eq(child, "pipeline")) { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 6b99df696384..034ee08482e0 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -81,10 +81,10 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_modeset_enables(dev, old_state); - drm_atomic_helper_wait_for_flip_done(dev, old_state); - drm_atomic_helper_commit_hw_done(old_state); + drm_atomic_helper_wait_for_flip_done(dev, old_state); + drm_atomic_helper_cleanup_planes(dev, old_state); } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c index 452e505a1fd3..719a79728e24 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c @@ -137,9 +137,10 @@ komeda_pipeline_get_first_component(struct komeda_pipeline *pipe, u32 comp_mask) { struct komeda_component *c = NULL; + unsigned long comp_mask_local = (unsigned long)comp_mask; int id; - id = find_first_bit((unsigned long *)&comp_mask, 32); + id = find_first_bit(&comp_mask_local, 32); if (id < 32) c = komeda_pipeline_get_component(pipe, id); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index 8f32ae7c25d0..5c085116de3f 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -704,10 +704,10 @@ komeda_compiz_set_input(struct komeda_compiz *compiz, cin->layer_alpha = dflow->layer_alpha; old_st = komeda_component_get_old_state(&compiz->base, drm_st); - WARN_ON(!old_st); /* compare with old to check if this input has been changed */ - if (memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin))) + if (WARN_ON(!old_st) || + memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin))) c_st->changed_active_inputs |= BIT(idx); komeda_component_add_input(c_st, &dflow->input, idx); diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index ad5cc13037ae..1c939f9c9bc9 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -297,13 +297,9 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv) */ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) { - struct irq_desc *desc; - if (!HAS_LPE_AUDIO(dev_priv)) return; - desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c80eeac53952..6cdb052e3850 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -60,6 +60,24 @@ * and related files, but that will be described in separate chapters. */ +/* + * Interrupt statistic for PMU. Increments the counter only if the + * interrupt originated from the the GPU so interrupts from a device which + * shares the interrupt line are not accounted. + */ +static inline void pmu_irq_stats(struct drm_i915_private *i915, + irqreturn_t res) +{ + if (unlikely(res != IRQ_HANDLED)) + return; + + /* + * A clever compiler translates that into INC. A not so clever one + * should at least prevent store tearing. + */ + WRITE_ONCE(i915->pmu.irq_count, i915->pmu.irq_count + 1); +} + typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val); typedef u32 (*hotplug_enables_func)(struct drm_i915_private *i915, enum hpd_pin pin); @@ -1668,6 +1686,8 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) valleyview_pipestat_irq_handler(dev_priv, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -1745,6 +1765,8 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) valleyview_pipestat_irq_handler(dev_priv, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -2155,6 +2177,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) if (sde_ier) raw_reg_write(regs, SDEIER, sde_ier); + pmu_irq_stats(i915, ret); + /* IRQs are synced during runtime_suspend, we don't require a wakeref */ enable_rpm_wakeref_asserts(&i915->runtime_pm); @@ -2541,6 +2565,8 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) gen8_master_intr_enable(regs); + pmu_irq_stats(dev_priv, IRQ_HANDLED); + return IRQ_HANDLED; } @@ -2636,6 +2662,8 @@ __gen11_irq_handler(struct drm_i915_private * const i915, gen11_gu_misc_irq_handler(gt, gu_misc_iir); + pmu_irq_stats(i915, IRQ_HANDLED); + return IRQ_HANDLED; } @@ -3934,6 +3962,8 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -4043,6 +4073,8 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) i915_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, ret); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; @@ -4189,6 +4221,8 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) i965_pipestat_irq_handler(dev_priv, iir, pipe_stats); } while (0); + pmu_irq_stats(dev_priv, IRQ_HANDLED); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index cd786ad12be7..d76685ce0399 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -4,7 +4,6 @@ * Copyright © 2017-2018 Intel Corporation */ -#include #include #include "gt/intel_engine.h" @@ -424,22 +423,6 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static u64 count_interrupts(struct drm_i915_private *i915) -{ - /* open-coded kstat_irqs() */ - struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); - u64 sum = 0; - int cpu; - - if (!desc || !desc->kstat_irqs) - return 0; - - for_each_possible_cpu(cpu) - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); - - return sum; -} - static void i915_pmu_event_destroy(struct perf_event *event) { struct drm_i915_private *i915 = @@ -590,7 +573,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event) USEC_PER_SEC /* to MHz */); break; case I915_PMU_INTERRUPTS: - val = count_interrupts(i915); + val = READ_ONCE(pmu->irq_count); break; case I915_PMU_RC6_RESIDENCY: val = get_rc6(&i915->gt); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index a24885ab415c..8405d6da5b9a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -111,6 +111,14 @@ struct i915_pmu { * @sleep_last: Last time GT parked for RC6 estimation. */ ktime_t sleep_last; + /** + * @irq_count: Number of interrupts + * + * Intentionally unsigned long to avoid atomics or heuristics on 32bit. + * 4e9 interrupts are a lot and postprocessing can really deal with an + * occasional wraparound easily. It's 32bit after all. + */ + unsigned long irq_count; /** * @events_attr_group: Device events attribute group. */ diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 5455b2044759..7b2f60616750 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -239,21 +239,6 @@ static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) return p; } -/* Count the number of pages available in a pool_type */ -static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) -{ - unsigned int count = 0; - struct page *p; - - spin_lock(&pt->lock); - /* Only used for debugfs, the overhead doesn't matter */ - list_for_each_entry(p, &pt->pages, lru) - ++count; - spin_unlock(&pt->lock); - - return count; -} - /* Initialize and add a pool type to the global shrinker list */ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, enum ttm_caching caching, unsigned int order) @@ -543,6 +528,20 @@ void ttm_pool_fini(struct ttm_pool *pool) EXPORT_SYMBOL(ttm_pool_fini); #ifdef CONFIG_DEBUG_FS +/* Count the number of pages available in a pool_type */ +static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) +{ + unsigned int count = 0; + struct page *p; + + spin_lock(&pt->lock); + /* Only used for debugfs, the overhead doesn't matter */ + list_for_each_entry(p, &pt->pages, lru) + ++count; + spin_unlock(&pt->lock); + + return count; +} /* Dump information about the different pool types */ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 560865f65dc4..67f86c405a26 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -157,12 +157,6 @@ void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) } EXPORT_SYMBOL(rtrs_clt_put_permit); -void *rtrs_permit_to_pdu(struct rtrs_permit *permit) -{ - return permit + 1; -} -EXPORT_SYMBOL(rtrs_permit_to_pdu); - /** * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit * @sess: client session pointer diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index 9af750f4d783..8738e90e715a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -63,13 +63,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, void rtrs_clt_close(struct rtrs_clt *sess); -/** - * rtrs_permit_to_pdu() - converts rtrs_permit to opaque pdu pointer - * @permit: RTRS permit pointer, it associates the memory allocation for future - * RDMA operation. - */ -void *rtrs_permit_to_pdu(struct rtrs_permit *permit); - enum { RTRS_PERMIT_NOWAIT = 0, RTRS_PERMIT_WAIT = 1, diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 0e06d721cd8e..a4752ac410dc 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2535,8 +2535,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (!IS_ERR(bdev)) - bdput(bdev); if (attr == &ksysfs_register_quiet) goto done; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 554e3afc9b68..00a520c03f41 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -404,7 +404,7 @@ STORE(__cached_dev) if (!env) return -ENOMEM; add_uevent_var(env, "DRIVER=bcache"); - add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), + add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid); add_uevent_var(env, "CACHED_LABEL=%s", buf); kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 6d1bf7c3ca3b..e43dea89b094 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1513,26 +1513,14 @@ static int ab8500_interrupts_show(struct seq_file *s, void *p) { int line; - seq_puts(s, "name: number: number of: wake:\n"); + seq_puts(s, "name: number: irq: number of: wake:\n"); for (line = 0; line < num_interrupt_lines; line++) { - struct irq_desc *desc = irq_to_desc(line + irq_first); - - seq_printf(s, "%3i: %6i %4i", + seq_printf(s, "%3i: %4i %6i %4i\n", line, + line + irq_first, num_interrupts[line], num_wake_interrupts[line]); - - if (desc && desc->name) - seq_printf(s, "-%-8s", desc->name); - if (desc && desc->action) { - struct irqaction *action = desc->action; - - seq_printf(s, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(s, ", %s", action->name); - } - seq_putc(s, '\n'); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 74d466796b7c..d5fc72b1a36f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -90,7 +90,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx) { struct mlx4_en_dev *mdev = priv->mdev; - int err = 0; + int irq, err = 0; int timestamp_en = 0; bool assigned_eq = false; @@ -116,10 +116,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, assigned_eq = true; } - - cq->irq_desc = - irq_to_desc(mlx4_eq_get_irq(mdev->dev, - cq->vector)); + irq = mlx4_eq_get_irq(mdev->dev, cq->vector); + cq->aff_mask = irq_get_effective_affinity_mask(irq); } else { /* For TX we use the same irq per ring we assigned for the RX */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 7954c1daf2b6..c1c9118a66c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -958,18 +958,14 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget || !clean_complete) { - const struct cpumask *aff; - struct irq_data *idata; int cpu_curr; /* in case we got here because of !clean_complete */ done = budget; cpu_curr = smp_processor_id(); - idata = irq_desc_get_irq_data(cq->irq_desc); - aff = irq_data_get_affinity_mask(idata); - if (likely(cpumask_test_cpu(cpu_curr, aff))) + if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask))) return budget; /* Current cpu is not according to smp_irq_affinity - diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 17f2b1919378..e8ed23190de0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -47,6 +47,7 @@ #endif #include #include +#include #include #include @@ -365,7 +366,7 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e - struct irq_desc *irq_desc; + const struct cpumask *aff_mask; }; struct mlx4_en_port_profile { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a1a81cfeb607..055baf3b6cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -684,7 +684,7 @@ struct mlx5e_channel { spinlock_t async_icosq_lock; /* data path - accessed per napi poll */ - struct irq_desc *irq_desc; + const struct cpumask *aff_mask; struct mlx5e_ch_stats *stats; /* control */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 351118985a57..2a2bac30daaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -479,7 +479,6 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = params->num_tc; c->stats = &priv->port_ptp_stats.ch; - c->irq_desc = irq_to_desc(irq); c->lag_port = lag_port; netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 28aa5ae118f4..90c98ea63b7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -28,7 +28,6 @@ struct mlx5e_port_ptp { u8 lag_port; /* data path - accessed per napi poll */ - struct irq_desc *irq_desc; struct mlx5e_ch_stats *stats; /* control */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03831650f655..7a79d330c075 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1987,7 +1987,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); + c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 1ec3d62f026d..a3cfe06d5116 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -40,12 +40,8 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { int current_cpu = smp_processor_id(); - const struct cpumask *aff; - struct irq_data *idata; - idata = irq_desc_get_irq_data(c->irq_desc); - aff = irq_data_get_affinity_mask(idata); - return cpumask_test_cpu(current_cpu, aff); + return cpumask_test_cpu(current_cpu, c->aff_mask); } static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 613cafec610f..882a64666a86 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3075,6 +3075,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); + err = -EINVAL; goto free; } diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 0a5e884a920c..3f05cfbc73af 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -282,15 +282,13 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, struct ntb_msi_desc *msi_desc) { struct msi_desc *entry; - struct irq_desc *desc; int ret; if (!ntb->msi) return -EINVAL; for_each_pci_msi_entry(entry, ntb->pdev) { - desc = irq_to_desc(entry->irq); - if (desc->action) + if (irq_has_action(entry->irq)) continue; ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 2e258bee7db2..aa53e0b769bd 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -7,7 +7,6 @@ #ifndef _LINUX_BTT_H #define _LINUX_BTT_H -#include #include #define BTT_SIG_LEN 16 @@ -197,6 +196,8 @@ struct arena_info { int log_index[2]; }; +struct badblocks; + /** * struct btt - handle for a BTT instance * @btt_disk: Pointer to the gendisk for BTT device diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 5a7c80053c62..030dbde6b088 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -4,6 +4,7 @@ */ #include #include +#include #include "nd-core.h" #include "pmem.h" #include "pfn.h" diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index c21ba0602029..7de592d7eff4 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -3,7 +3,6 @@ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ #include -#include #include #include #include diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 47a4828b8b31..9251441fd8a3 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -980,6 +980,15 @@ static int __blk_label_update(struct nd_region *nd_region, } } + /* release slots associated with any invalidated UUIDs */ + mutex_lock(&nd_mapping->lock); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) + if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)) { + reap_victim(nd_mapping, label_ent); + list_move(&label_ent->list, &list); + } + mutex_unlock(&nd_mapping->lock); + /* * Find the resource associated with the first label in the set * per the v1.2 namespace specification. @@ -999,8 +1008,10 @@ static int __blk_label_update(struct nd_region *nd_region, if (is_old_resource(res, old_res_list, old_num_resources)) continue; /* carry-over */ slot = nd_label_alloc_slot(ndd); - if (slot == UINT_MAX) + if (slot == UINT_MAX) { + rc = -ENXIO; goto abort; + } dev_dbg(ndd->dev, "allocated: %d\n", slot); nd_label = to_label(ndd, slot); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 282071fc167f..df17cae2e4e6 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -398,12 +398,8 @@ int dw_pcie_host_init(struct pcie_port *pp) pp); ret = dma_set_mask(pci->dev, DMA_BIT_MASK(32)); - if (!ret) { - dev_warn(pci->dev, - "Failed to set DMA mask to 32-bit. " - "Devices with only 32-bit MSI support" - " may not work properly\n"); - } + if (ret) + dev_warn(pci->dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n"); pp->msi_data = dma_map_single_attrs(pci->dev, &pp->msi_msg, sizeof(pp->msi_msg), diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 5597b2a49598..6fa216e52d14 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -853,12 +853,14 @@ static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie) dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val); } -static void tegra_pcie_prepare_host(struct pcie_port *pp) +static int tegra_pcie_dw_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); u32 val; + pp->bridge->ops = &tegra_pci_ops; + if (!pcie->pcie_cap_base) pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci, PCI_CAP_ID_EXP); @@ -907,10 +909,24 @@ static void tegra_pcie_prepare_host(struct pcie_port *pp) dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val); } - dw_pcie_setup_rc(pp); - clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ); + return 0; +} + +static int tegra_pcie_dw_start_link(struct dw_pcie *pci) +{ + u32 val, offset, speed, tmp; + struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); + struct pcie_port *pp = &pci->pp; + bool retry = true; + + if (pcie->mode == DW_PCIE_EP_TYPE) { + enable_irq(pcie->pex_rst_irq); + return 0; + } + +retry_link: /* Assert RST */ val = appl_readl(pcie, APPL_PINMUX); val &= ~APPL_PINMUX_PEX_RST; @@ -929,19 +945,10 @@ static void tegra_pcie_prepare_host(struct pcie_port *pp) appl_writel(pcie, val, APPL_PINMUX); msleep(100); -} - -static int tegra_pcie_dw_host_init(struct pcie_port *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); - u32 val, tmp, offset, speed; - - pp->bridge->ops = &tegra_pci_ops; - - tegra_pcie_prepare_host(pp); if (dw_pcie_wait_for_link(pci)) { + if (!retry) + return 0; /* * There are some endpoints which can't get the link up if * root port has Data Link Feature (DLF) enabled. @@ -975,10 +982,11 @@ static int tegra_pcie_dw_host_init(struct pcie_port *pp) val &= ~PCI_DLF_EXCHANGE_ENABLE; dw_pcie_writel_dbi(pci, offset, val); - tegra_pcie_prepare_host(pp); + tegra_pcie_dw_host_init(pp); + dw_pcie_setup_rc(pp); - if (dw_pcie_wait_for_link(pci)) - return 0; + retry = false; + goto retry_link; } speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) & @@ -998,15 +1006,6 @@ static int tegra_pcie_dw_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static int tegra_pcie_dw_start_link(struct dw_pcie *pci) -{ - struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); - - enable_irq(pcie->pex_rst_irq); - - return 0; -} - static void tegra_pcie_dw_stop_link(struct dw_pcie *pci) { struct tegra_pcie_dw *pcie = to_tegra_pcie(pci); @@ -2215,6 +2214,10 @@ static int tegra_pcie_dw_resume_noirq(struct device *dev) goto fail_host_init; } + ret = tegra_pcie_dw_start_link(&pcie->pci); + if (ret < 0) + goto fail_host_init; + /* Restore MSI interrupt vector */ dw_pcie_writel_dbi(&pcie->pci, PORT_LOGIC_MSI_CTRL_INT_0_EN, pcie->msi_ctrl_int); diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index a2632d02ce8f..c637de3a389b 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -306,13 +306,11 @@ int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit) static void mobiveil_mask_intx_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct mobiveil_pcie *pcie; + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); struct mobiveil_root_port *rp; unsigned long flags; u32 mask, shifted_val; - pcie = irq_desc_get_chip_data(desc); rp = &pcie->rp; mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); raw_spin_lock_irqsave(&rp->intx_mask_lock, flags); @@ -324,13 +322,11 @@ static void mobiveil_mask_intx_irq(struct irq_data *data) static void mobiveil_unmask_intx_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct mobiveil_pcie *pcie; + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); struct mobiveil_root_port *rp; unsigned long flags; u32 shifted_val, mask; - pcie = irq_desc_get_chip_data(desc); rp = &pcie->rp; mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); raw_spin_lock_irqsave(&rp->intx_mask_lock, flags); diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 7f29c2fdcd51..07e36661bbc2 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -374,13 +374,11 @@ static void nwl_pcie_msi_handler_low(struct irq_desc *desc) static void nwl_mask_leg_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct nwl_pcie *pcie; + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); unsigned long flags; u32 mask; u32 val; - pcie = irq_desc_get_chip_data(desc); mask = 1 << (data->hwirq - 1); raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); @@ -390,13 +388,11 @@ static void nwl_mask_leg_irq(struct irq_data *data) static void nwl_unmask_leg_irq(struct irq_data *data) { - struct irq_desc *desc = irq_to_desc(data->irq); - struct nwl_pcie *pcie; + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); unsigned long flags; u32 mask; u32 val; - pcie = irq_desc_get_chip_data(desc); mask = 1 << (data->hwirq - 1); raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 657e35a75d84..d4ea10803fd9 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -948,8 +948,8 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, (mode < 0) ? "unknown" : modes[mode]); } else { int irq = chip->to_irq(chip, offset); - struct irq_desc *desc = irq_to_desc(irq); const int pullidx = pull ? 1 : 0; + bool wake; int val; static const char * const pulls[] = { "none ", @@ -969,8 +969,9 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, * This races with request_irq(), set_irq_type(), * and set_irq_wake() ... but those are "rare". */ - if (irq > 0 && desc && desc->action) { + if (irq > 0 && irq_has_action(irq)) { char *trigger; + bool wake; if (nmk_chip->edge_rising & BIT(offset)) trigger = "edge-rising"; @@ -979,10 +980,10 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, else trigger = "edge-undefined"; + wake = !!(nmk_chip->real_wake & BIT(offset)); + seq_printf(s, " irq-%d %s%s", - irq, trigger, - irqd_is_wakeup_set(&desc->irq_data) - ? " wakeup" : ""); + irq, trigger, wake ? " wakeup" : ""); } } clk_disable(nmk_chip->clk); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 0ecee8b8773d..7c92a6e22d75 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -742,12 +742,16 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, * Sensor events need to be parsed by the sensor sub-device. * Defer them, and don't report the wakeup here. */ - if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) - *wake_event = false; - /* Masked host-events should not count as wake events. */ - else if (host_event && - !(host_event & ec_dev->host_event_wake_mask)) + if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) { *wake_event = false; + } else if (host_event) { + /* rtc_update_irq() already handles wakeup events. */ + if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_RTC)) + *wake_event = false; + /* Masked host-events should not count as wake events. */ + if (!(host_event & ec_dev->host_event_wake_mask)) + *wake_event = false; + } } return ret; diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 8111ed1fc574..c43868615790 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +32,12 @@ enum { CROS_EC_ALTMODE_MAX, }; +/* Container for altmode pointer nodes. */ +struct cros_typec_altmode_node { + struct typec_altmode *amode; + struct list_head list; +}; + /* Per port data. */ struct cros_typec_port { struct typec_port *port; @@ -48,6 +56,11 @@ struct cros_typec_port { /* Port alt modes. */ struct typec_altmode p_altmode[CROS_EC_ALTMODE_MAX]; + + /* Flag indicating that PD discovery data parsing is completed. */ + bool disc_done; + struct ec_response_typec_discovery *sop_disc; + struct list_head partner_mode_list; }; /* Platform-specific data for the Chrome OS EC Type C controller. */ @@ -60,6 +73,7 @@ struct cros_typec_data { struct cros_typec_port *ports[EC_USB_PD_MAX_PORTS]; struct notifier_block nb; struct work_struct port_work; + bool typec_cmd_supported; }; static int cros_typec_parse_port_props(struct typec_capability *cap, @@ -166,11 +180,25 @@ static int cros_typec_add_partner(struct cros_typec_data *typec, int port_num, return ret; } +static void cros_typec_unregister_altmodes(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct cros_typec_altmode_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &port->partner_mode_list, list) { + list_del(&node->list); + typec_unregister_altmode(node->amode); + devm_kfree(typec->dev, node); + } +} + static void cros_typec_remove_partner(struct cros_typec_data *typec, int port_num) { struct cros_typec_port *port = typec->ports[port_num]; + cros_typec_unregister_altmodes(typec, port_num); + port->state.alt = NULL; port->state.mode = TYPEC_STATE_USB; port->state.data = NULL; @@ -181,6 +209,8 @@ static void cros_typec_remove_partner(struct cros_typec_data *typec, typec_unregister_partner(port->partner); port->partner = NULL; + memset(&port->p_identity, 0, sizeof(port->p_identity)); + port->disc_done = false; } static void cros_unregister_ports(struct cros_typec_data *typec) @@ -190,7 +220,10 @@ static void cros_unregister_ports(struct cros_typec_data *typec) for (i = 0; i < typec->num_ports; i++) { if (!typec->ports[i]) continue; - cros_typec_remove_partner(typec, i); + + if (typec->ports[i]->partner) + cros_typec_remove_partner(typec, i); + usb_role_switch_put(typec->ports[i]->role_sw); typec_switch_put(typec->ports[i]->ori_sw); typec_mux_put(typec->ports[i]->mux); @@ -289,6 +322,14 @@ static int cros_typec_init_ports(struct cros_typec_data *typec) port_num); cros_typec_register_port_altmodes(typec, port_num); + + cros_port->sop_disc = devm_kzalloc(dev, EC_PROTO2_MAX_RESPONSE_SIZE, GFP_KERNEL); + if (!cros_port->sop_disc) { + ret = -ENOMEM; + goto unregister_ports; + } + + INIT_LIST_HEAD(&cros_port->partner_mode_list); } return 0; @@ -329,74 +370,6 @@ static int cros_typec_ec_command(struct cros_typec_data *typec, return ret; } -static void cros_typec_set_port_params_v0(struct cros_typec_data *typec, - int port_num, struct ec_response_usb_pd_control *resp) -{ - struct typec_port *port = typec->ports[port_num]->port; - enum typec_orientation polarity; - - if (!resp->enabled) - polarity = TYPEC_ORIENTATION_NONE; - else if (!resp->polarity) - polarity = TYPEC_ORIENTATION_NORMAL; - else - polarity = TYPEC_ORIENTATION_REVERSE; - - typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK); - typec_set_orientation(port, polarity); -} - -static void cros_typec_set_port_params_v1(struct cros_typec_data *typec, - int port_num, struct ec_response_usb_pd_control_v1 *resp) -{ - struct typec_port *port = typec->ports[port_num]->port; - enum typec_orientation polarity; - bool pd_en; - int ret; - - if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED)) - polarity = TYPEC_ORIENTATION_NONE; - else if (!resp->polarity) - polarity = TYPEC_ORIENTATION_NORMAL; - else - polarity = TYPEC_ORIENTATION_REVERSE; - typec_set_orientation(port, polarity); - typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ? - TYPEC_HOST : TYPEC_DEVICE); - typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ? - TYPEC_SOURCE : TYPEC_SINK); - typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ? - TYPEC_SOURCE : TYPEC_SINK); - - /* Register/remove partners when a connect/disconnect occurs. */ - if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) { - if (typec->ports[port_num]->partner) - return; - - pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE; - ret = cros_typec_add_partner(typec, port_num, pd_en); - if (ret) - dev_warn(typec->dev, - "Failed to register partner on port: %d\n", - port_num); - } else { - if (!typec->ports[port_num]->partner) - return; - cros_typec_remove_partner(typec, port_num); - } -} - -static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, - struct ec_response_usb_pd_mux_info *resp) -{ - struct ec_params_usb_pd_mux_info req = { - .port = port_num, - }; - - return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req, - sizeof(req), resp, sizeof(*resp)); -} - static int cros_typec_usb_safe_state(struct cros_typec_port *port) { port->state.mode = TYPEC_STATE_SAFE; @@ -563,15 +536,210 @@ static int cros_typec_configure_mux(struct cros_typec_data *typec, int port_num, port->state.mode = TYPEC_STATE_USB; ret = typec_mux_set(port->mux, &port->state); } else { - dev_info(typec->dev, - "Unsupported mode requested, mux flags: %x\n", - mux_flags); - ret = -ENOTSUPP; + dev_dbg(typec->dev, + "Unrecognized mode requested, mux flags: %x\n", + mux_flags); } return ret; } +static void cros_typec_set_port_params_v0(struct cros_typec_data *typec, + int port_num, struct ec_response_usb_pd_control *resp) +{ + struct typec_port *port = typec->ports[port_num]->port; + enum typec_orientation polarity; + + if (!resp->enabled) + polarity = TYPEC_ORIENTATION_NONE; + else if (!resp->polarity) + polarity = TYPEC_ORIENTATION_NORMAL; + else + polarity = TYPEC_ORIENTATION_REVERSE; + + typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK); + typec_set_orientation(port, polarity); +} + +static void cros_typec_set_port_params_v1(struct cros_typec_data *typec, + int port_num, struct ec_response_usb_pd_control_v1 *resp) +{ + struct typec_port *port = typec->ports[port_num]->port; + enum typec_orientation polarity; + bool pd_en; + int ret; + + if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED)) + polarity = TYPEC_ORIENTATION_NONE; + else if (!resp->polarity) + polarity = TYPEC_ORIENTATION_NORMAL; + else + polarity = TYPEC_ORIENTATION_REVERSE; + typec_set_orientation(port, polarity); + typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ? + TYPEC_HOST : TYPEC_DEVICE); + typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ? + TYPEC_SOURCE : TYPEC_SINK); + typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ? + TYPEC_SOURCE : TYPEC_SINK); + + /* Register/remove partners when a connect/disconnect occurs. */ + if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) { + if (typec->ports[port_num]->partner) + return; + + pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE; + ret = cros_typec_add_partner(typec, port_num, pd_en); + if (ret) + dev_warn(typec->dev, + "Failed to register partner on port: %d\n", + port_num); + } else { + if (!typec->ports[port_num]->partner) + return; + cros_typec_remove_partner(typec, port_num); + } +} + +static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num, + struct ec_response_usb_pd_mux_info *resp) +{ + struct ec_params_usb_pd_mux_info req = { + .port = port_num, + }; + + return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req, + sizeof(req), resp, sizeof(*resp)); +} + +static int cros_typec_register_altmodes(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct ec_response_typec_discovery *sop_disc = port->sop_disc; + struct cros_typec_altmode_node *node; + struct typec_altmode_desc desc; + struct typec_altmode *amode; + int ret = 0; + int i, j; + + for (i = 0; i < sop_disc->svid_count; i++) { + for (j = 0; j < sop_disc->svids[i].mode_count; j++) { + memset(&desc, 0, sizeof(desc)); + desc.svid = sop_disc->svids[i].svid; + desc.mode = j; + desc.vdo = sop_disc->svids[i].mode_vdo[j]; + + amode = typec_partner_register_altmode(port->partner, &desc); + if (IS_ERR(amode)) { + ret = PTR_ERR(amode); + goto err_cleanup; + } + + /* If no memory is available we should unregister and exit. */ + node = devm_kzalloc(typec->dev, sizeof(*node), GFP_KERNEL); + if (!node) { + ret = -ENOMEM; + typec_unregister_altmode(amode); + goto err_cleanup; + } + + node->amode = amode; + list_add_tail(&node->list, &port->partner_mode_list); + } + } + + return 0; + +err_cleanup: + cros_typec_unregister_altmodes(typec, port_num); + return ret; +} + +static int cros_typec_handle_sop_disc(struct cros_typec_data *typec, int port_num) +{ + struct cros_typec_port *port = typec->ports[port_num]; + struct ec_response_typec_discovery *sop_disc = port->sop_disc; + struct ec_params_typec_discovery req = { + .port = port_num, + .partner_type = TYPEC_PARTNER_SOP, + }; + int ret = 0; + int i; + + if (!port->partner) { + dev_err(typec->dev, + "SOP Discovery received without partner registered, port: %d\n", + port_num); + ret = -EINVAL; + goto disc_exit; + } + + memset(sop_disc, 0, EC_PROTO2_MAX_RESPONSE_SIZE); + ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_DISCOVERY, &req, sizeof(req), + sop_disc, EC_PROTO2_MAX_RESPONSE_SIZE); + if (ret < 0) { + dev_err(typec->dev, "Failed to get SOP discovery data for port: %d\n", port_num); + goto disc_exit; + } + + /* First, update the PD identity VDOs for the partner. */ + if (sop_disc->identity_count > 0) + port->p_identity.id_header = sop_disc->discovery_vdo[0]; + if (sop_disc->identity_count > 1) + port->p_identity.cert_stat = sop_disc->discovery_vdo[1]; + if (sop_disc->identity_count > 2) + port->p_identity.product = sop_disc->discovery_vdo[2]; + + /* Copy the remaining identity VDOs till a maximum of 6. */ + for (i = 3; i < sop_disc->identity_count && i < VDO_MAX_OBJECTS; i++) + port->p_identity.vdo[i - 3] = sop_disc->discovery_vdo[i]; + + ret = typec_partner_set_identity(port->partner); + if (ret < 0) { + dev_err(typec->dev, "Failed to update partner PD identity, port: %d\n", port_num); + goto disc_exit; + } + + ret = cros_typec_register_altmodes(typec, port_num); + if (ret < 0) { + dev_err(typec->dev, "Failed to register partner altmodes, port: %d\n", port_num); + goto disc_exit; + } + +disc_exit: + return ret; +} + +static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num) +{ + struct ec_response_typec_status resp; + struct ec_params_typec_status req = { + .port = port_num, + }; + int ret; + + ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_STATUS, &req, sizeof(req), + &resp, sizeof(resp)); + if (ret < 0) { + dev_warn(typec->dev, "EC_CMD_TYPEC_STATUS failed for port: %d\n", port_num); + return; + } + + if (typec->ports[port_num]->disc_done) + return; + + /* Handle any events appropriately. */ + if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE) { + ret = cros_typec_handle_sop_disc(typec, port_num); + if (ret < 0) { + dev_err(typec->dev, "Couldn't parse SOP Disc data, port: %d\n", port_num); + return; + } + + typec->ports[port_num]->disc_done = true; + } +} + static int cros_typec_port_update(struct cros_typec_data *typec, int port_num) { struct ec_params_usb_pd_control req; @@ -608,6 +776,9 @@ static int cros_typec_port_update(struct cros_typec_data *typec, int port_num) cros_typec_set_port_params_v0(typec, port_num, (struct ec_response_usb_pd_control *) &resp); + if (typec->typec_cmd_supported) + cros_typec_handle_status(typec, port_num); + /* Update the switches if they exist, according to requested state */ ret = cros_typec_get_mux_info(typec, port_num, &mux_resp); if (ret < 0) { @@ -656,6 +827,23 @@ static int cros_typec_get_cmd_version(struct cros_typec_data *typec) return 0; } +/* Check the EC feature flags to see if TYPEC_* commands are supported. */ +static int cros_typec_cmds_supported(struct cros_typec_data *typec) +{ + struct ec_response_get_features resp = {}; + int ret; + + ret = cros_typec_ec_command(typec, 0, EC_CMD_GET_FEATURES, NULL, 0, + &resp, sizeof(resp)); + if (ret < 0) { + dev_warn(typec->dev, + "Failed to get features, assuming typec commands unsupported.\n"); + return 0; + } + + return resp.flags[EC_FEATURE_TYPEC_CMD / 32] & EC_FEATURE_MASK_1(EC_FEATURE_TYPEC_CMD); +} + static void cros_typec_port_work(struct work_struct *work) { struct cros_typec_data *typec = container_of(work, struct cros_typec_data, port_work); @@ -715,6 +903,8 @@ static int cros_typec_probe(struct platform_device *pdev) return ret; } + typec->typec_cmd_supported = !!cros_typec_cmds_supported(typec); + ret = cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_PORTS, NULL, 0, &resp, sizeof(resp)); if (ret < 0) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 99f86612f775..dc78a523a69f 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -256,7 +256,6 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) return; device->discipline->get_uid(device, &uid); spin_lock_irqsave(&lcu->lock, flags); - list_del_init(&device->alias_list); /* make sure that the workers don't use this device */ if (device == lcu->suc_data.device) { spin_unlock_irqrestore(&lcu->lock, flags); @@ -283,6 +282,7 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_lock_irqsave(&aliastree.lock, flags); spin_lock(&lcu->lock); + list_del_init(&device->alias_list); if (list_empty(&lcu->grouplist) && list_empty(&lcu->active_devices) && list_empty(&lcu->inactive_devices)) { @@ -462,11 +462,19 @@ static int read_unit_address_configuration(struct dasd_device *device, spin_unlock_irqrestore(&lcu->lock, flags); rc = dasd_sleep_on(cqr); - if (rc && !suborder_not_supported(cqr)) { + if (!rc) + goto out; + + if (suborder_not_supported(cqr)) { + /* suborder not supported or device unusable for IO */ + rc = -EOPNOTSUPP; + } else { + /* IO failed but should be retried */ spin_lock_irqsave(&lcu->lock, flags); lcu->flags |= NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); } +out: dasd_sfree_request(cqr, cqr->memdev); return rc; } @@ -503,6 +511,14 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) return rc; spin_lock_irqsave(&lcu->lock, flags); + /* + * there is another update needed skip the remaining handling + * the data might already be outdated + * but especially do not add the device to an LCU with pending + * update + */ + if (lcu->flags & NEED_UAC_UPDATE) + goto out; lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -521,6 +537,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } +out: spin_unlock_irqrestore(&lcu->lock, flags); return 0; } @@ -625,6 +642,7 @@ int dasd_alias_add_device(struct dasd_device *device) } if (lcu->flags & UPDATE_PENDING) { list_move(&device->alias_list, &lcu->active_devices); + private->pavgroup = NULL; _schedule_lcu_update(lcu, device); } spin_unlock_irqrestore(&lcu->lock, flags); diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 6caf539091e5..92a6396f8a73 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -9,21 +9,24 @@ menuconfig VDPA if VDPA config VDPA_SIM - tristate "vDPA device simulator" + tristate "vDPA device simulator core" depends on RUNTIME_TESTING_MENU && HAS_DMA select DMA_OPS select VHOST_RING - select GENERIC_NET_UTILS - default n help - vDPA networking device simulator which loop TX traffic back - to RX. This device is used for testing, prototyping and - development of vDPA. + Enable this module to support vDPA device simulators. These devices + are used for testing, prototyping and development of vDPA. + +config VDPA_SIM_NET + tristate "vDPA simulator for networking device" + depends on VDPA_SIM + select GENERIC_NET_UTILS + help + vDPA networking device simulator which loops TX traffic back to RX. config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI - default n help This kernel module can drive Intel IFC VF NIC to offload virtio dataplane traffic to hardware. @@ -42,7 +45,6 @@ config MLX5_VDPA_NET tristate "vDPA driver for ConnectX devices" select MLX5_VDPA depends on MLX5_CORE - default n help VDPA network driver for ConnectX6 and newer. Provides offloading of virtio net datapath such that descriptors put on the ring will diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 8b4028556cb6..fa1af301cf55 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { - IFCVF_ERR(pdev, "No usable DMA confiugration\n"); - return ret; - } - - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, - "No usable coherent DMA confiugration\n"); + IFCVF_ERR(pdev, "No usable DMA configuration\n"); return ret; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index f1d54814db97..88dde3455bfd 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -479,6 +479,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); rx_post(&mvq->vqqp, num); if (mvq->event_cb.callback) mvq->event_cb.callback(mvq->event_cb.private); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index a69ffc991e13..c0825650c055 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (!vdev) goto err; - err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&vdpa_index_ida, GFP_KERNEL); if (err < 0) goto err_ida; diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile index b40278f65e04..79d4536d347e 100644 --- a/drivers/vdpa/vdpa_sim/Makefile +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o +obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6a90fdb9cbfc..b3fcc67bfdf0 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * VDPA networking device simulator. + * VDPA device simulator core. * * Copyright (c) 2020, Red Hat Inc. All rights reserved. * Author: Jason Wang @@ -11,97 +11,32 @@ #include #include #include -#include -#include #include #include -#include -#include -#include #include -#include -#include -#include #include #include -#include #include -#include -#include + +#include "vdpa_sim.h" #define DRV_VERSION "0.1" #define DRV_AUTHOR "Jason Wang " -#define DRV_DESC "vDPA Device Simulator" +#define DRV_DESC "vDPA Device Simulator core" #define DRV_LICENSE "GPL v2" static int batch_mapping = 1; module_param(batch_mapping, int, 0444); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -struct vdpasim_virtqueue { - struct vringh vring; - struct vringh_kiov iov; - unsigned short head; - bool ready; - u64 desc_addr; - u64 device_addr; - u64 driver_addr; - u32 num; - void *private; - irqreturn_t (*cb)(void *data); -}; +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 -#define VDPASIM_DEVICE_ID 0x1 #define VDPASIM_VENDOR_ID 0 -#define VDPASIM_VQ_NUM 0x2 -#define VDPASIM_NAME "vdpasim-netdev" - -static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_NET_F_MAC); - -/* State of each vdpasim device */ -struct vdpasim { - struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; - struct work_struct work; - /* spinlock to synchronize virtqueue state */ - spinlock_t lock; - struct virtio_net_config config; - struct vhost_iotlb *iommu; - void *buffer; - u32 status; - u32 generation; - u64 features; - /* spinlock to synchronize iommu table */ - spinlock_t iommu_lock; -}; - -/* TODO: cross-endian support */ -static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) -{ - return virtio_legacy_is_little_endian() || - (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); -} - -static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) -{ - return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); -} - -static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) -{ - return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); -} - -static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) { @@ -115,20 +50,34 @@ static struct vdpasim *dev_to_sim(struct device *dev) return vdpa_to_sim(vdpa); } +static void vdpasim_vq_notify(struct vringh *vring) +{ + struct vdpasim_virtqueue *vq = + container_of(vring, struct vdpasim_virtqueue, vring); + + if (!vq->cb) + return; + + vq->cb(vq->private); +} + static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - vringh_init_iotlb(&vq->vring, vdpasim_features, + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, VDPASIM_QUEUE_MAX, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) (uintptr_t)vq->driver_addr, (struct vring_used *) (uintptr_t)vq->device_addr); + + vq->vring.notify = vdpasim_vq_notify; } -static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) +static void vdpasim_vq_reset(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) { vq->ready = false; vq->desc_addr = 0; @@ -136,16 +85,18 @@ static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) vq->device_addr = 0; vq->cb = NULL; vq->private = NULL; - vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, - false, NULL, NULL, NULL); + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, + VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); + + vq->vring.notify = NULL; } static void vdpasim_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < VDPASIM_VQ_NUM; i++) - vdpasim_vq_reset(&vdpasim->vqs[i]); + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) + vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); @@ -156,80 +107,6 @@ static void vdpasim_reset(struct vdpasim *vdpasim) ++vdpasim->generation; } -static void vdpasim_work(struct work_struct *work) -{ - struct vdpasim *vdpasim = container_of(work, struct - vdpasim, work); - struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; - struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; - ssize_t read, write; - size_t total_write; - int pkts = 0; - int err; - - spin_lock(&vdpasim->lock); - - if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) - goto out; - - if (!txq->ready || !rxq->ready) - goto out; - - while (true) { - total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, - &txq->head, GFP_ATOMIC); - if (err <= 0) - break; - - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, - &rxq->head, GFP_ATOMIC); - if (err <= 0) { - vringh_complete_iotlb(&txq->vring, txq->head, 0); - break; - } - - while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, - vdpasim->buffer, - PAGE_SIZE); - if (read <= 0) - break; - - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, - vdpasim->buffer, read); - if (write <= 0) - break; - - total_write += write; - } - - /* Make sure data is wrote before advancing index */ - smp_wmb(); - - vringh_complete_iotlb(&txq->vring, txq->head, 0); - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); - - /* Make sure used is visible before rasing the interrupt. */ - smp_wmb(); - - local_bh_disable(); - if (txq->cb) - txq->cb(txq->private); - if (rxq->cb) - rxq->cb(rxq->private); - local_bh_enable(); - - if (++pkts > 4) { - schedule_work(&vdpasim->work); - goto out; - } - } - -out: - spin_unlock(&vdpasim->lock); -} - static int dir_to_perm(enum dma_data_direction dir) { int perm = -EFAULT; @@ -342,26 +219,28 @@ static const struct dma_map_ops vdpasim_dma_ops = { .free = vdpasim_free_coherent, }; -static const struct vdpa_config_ops vdpasim_net_config_ops; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops; +static const struct vdpa_config_ops vdpasim_config_ops; +static const struct vdpa_config_ops vdpasim_batch_config_ops; -static struct vdpasim *vdpasim_create(void) +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; - int ret = -ENOMEM; + int i, ret = -ENOMEM; if (batch_mapping) - ops = &vdpasim_net_batch_config_ops; + ops = &vdpasim_batch_config_ops; else - ops = &vdpasim_net_config_ops; + ops = &vdpasim_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + dev_attr->nvqs); if (!vdpasim) goto err_alloc; - INIT_WORK(&vdpasim->work, vdpasim_work); + vdpasim->dev_attr = *dev_attr; + INIT_WORK(&vdpasim->work, dev_attr->work_fn); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -371,31 +250,27 @@ static struct vdpasim *vdpasim_create(void) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); - vdpasim->iommu = vhost_iotlb_alloc(2048, 0); + vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); + if (!vdpasim->config) + goto err_iommu; + + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), + GFP_KERNEL); + if (!vdpasim->vqs) + goto err_iommu; + + vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0); if (!vdpasim->iommu) goto err_iommu; - vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; - if (macaddr) { - mac_pton(macaddr, vdpasim->config.mac); - if (!is_valid_ether_addr(vdpasim->config.mac)) { - ret = -EADDRNOTAVAIL; - goto err_iommu; - } - } else { - eth_random_addr(vdpasim->config.mac); - } - - vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); - vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); + for (i = 0; i < dev_attr->nvqs; i++) + vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; - ret = vdpa_register_device(&vdpasim->vdpa); - if (ret) - goto err_iommu; return vdpasim; @@ -404,6 +279,7 @@ err_iommu: err_alloc: return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(vdpasim_create); static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, u64 desc_area, u64 driver_area, @@ -498,28 +374,21 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) static u64 vdpasim_get_features(struct vdpa_device *vdpa) { - return vdpasim_features; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.supported_features; } static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct virtio_net_config *config = &vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; - vdpasim->features = features & vdpasim_features; + vdpasim->features = features & vdpasim->dev_attr.supported_features; - /* We generally only know whether guest is using the legacy interface - * here, so generally that's the earliest we can set config fields. - * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which - * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. - */ - - config->mtu = cpu_to_vdpasim16(vdpasim, 1500); - config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); return 0; } @@ -536,7 +405,9 @@ static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) { - return VDPASIM_DEVICE_ID; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.id; } static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) @@ -572,14 +443,27 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, (u8 *)&vdpasim->config + offset, len); + if (offset + len > vdpasim->dev_attr.config_size) + return; + + if (vdpasim->dev_attr.get_config) + vdpasim->dev_attr.get_config(vdpasim, vdpasim->config); + + memcpy(buf, vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, const void *buf, unsigned int len) { - /* No writable config supportted by vdpasim */ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + memcpy(vdpasim->config + offset, buf, len); + + if (vdpasim->dev_attr.set_config) + vdpasim->dev_attr.set_config(vdpasim, vdpasim->config); } static u32 vdpasim_get_generation(struct vdpa_device *vdpa) @@ -656,12 +540,14 @@ static void vdpasim_free(struct vdpa_device *vdpa) struct vdpasim *vdpasim = vdpa_to_sim(vdpa); cancel_work_sync(&vdpasim->work); - kfree(vdpasim->buffer); + kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); + kfree(vdpasim->vqs); + kfree(vdpasim->config); } -static const struct vdpa_config_ops vdpasim_net_config_ops = { +static const struct vdpa_config_ops vdpasim_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, @@ -688,7 +574,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .free = vdpasim_free, }; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { +static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, @@ -714,26 +600,6 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { .free = vdpasim_free, }; -static int __init vdpasim_dev_init(void) -{ - vdpasim_dev = vdpasim_create(); - - if (!IS_ERR(vdpasim_dev)) - return 0; - - return PTR_ERR(vdpasim_dev); -} - -static void __exit vdpasim_dev_exit(void) -{ - struct vdpa_device *vdpa = &vdpasim_dev->vdpa; - - vdpa_unregister_device(vdpa); -} - -module_init(vdpasim_dev_init) -module_exit(vdpasim_dev_exit) - MODULE_VERSION(DRV_VERSION); MODULE_LICENSE(DRV_LICENSE); MODULE_AUTHOR(DRV_AUTHOR); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h new file mode 100644 index 000000000000..b02142293d5b --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + */ + +#ifndef _VDPA_SIM_H +#define _VDPA_SIM_H + +#include +#include +#include +#include +#include + +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +struct vdpasim; + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +struct vdpasim_dev_attr { + u64 supported_features; + size_t config_size; + size_t buffer_size; + int nvqs; + u32 id; + + work_func_t work_fn; + void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); +}; + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue *vqs; + struct work_struct work; + struct vdpasim_dev_attr dev_attr; + /* spinlock to synchronize virtqueue state */ + spinlock_t lock; + /* virtio config according to device type */ + void *config; + struct vhost_iotlb *iommu; + void *buffer; + u32 status; + u32 generation; + u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; +}; + +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr); + +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val) +{ + return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val) +{ + return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val) +{ + return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val) +{ + return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val); +} + +#endif diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c new file mode 100644 index 000000000000..c10b6981fdab --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for networking device. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang " +#define DRV_DESC "vDPA Device Simulator for networking device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC)) + +#define VDPASIM_NET_VQ_NUM 2 + +static char *macaddr; +module_param(macaddr, charp, 0); +MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); + +u8 macaddr_buf[ETH_ALEN]; + +static struct vdpasim *vdpasim_net_dev; + +static void vdpasim_net_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + ssize_t read, write; + size_t total_write; + int pkts = 0; + int err; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + total_write = 0; + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) + break; + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + vringh_complete_iotlb(&txq->vring, txq->head, 0); + break; + } + + while (true) { + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, + vdpasim->buffer, + PAGE_SIZE); + if (read <= 0) + break; + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, + vdpasim->buffer, read); + if (write <= 0) + break; + + total_write += write; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&txq->vring, txq->head, 0); + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&txq->vring) > 0) + vringh_notify(&txq->vring); + if (vringh_need_notify_iotlb(&rxq->vring) > 0) + vringh_notify(&rxq->vring); + local_bh_enable(); + + if (++pkts > 4) { + schedule_work(&vdpasim->work); + goto out; + } + } + +out: + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static int __init vdpasim_net_init(void) +{ + struct vdpasim_dev_attr dev_attr = {}; + int ret; + + if (macaddr) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { + ret = -EADDRNOTAVAIL; + goto out; + } + } else { + eth_random_addr(macaddr_buf); + } + + dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; + dev_attr.nvqs = VDPASIM_NET_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; + dev_attr.work_fn = vdpasim_net_work; + dev_attr.buffer_size = PAGE_SIZE; + + vdpasim_net_dev = vdpasim_create(&dev_attr); + if (IS_ERR(vdpasim_net_dev)) { + ret = PTR_ERR(vdpasim_net_dev); + goto out; + } + + ret = vdpa_register_device(&vdpasim_net_dev->vdpa); + if (ret) + goto put_dev; + + return 0; + +put_dev: + put_device(&vdpasim_net_dev->vdpa.dev); +out: + return ret; +} + +static void __exit vdpasim_net_exit(void) +{ + struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa; + + vdpa_unregister_device(vdpa); +} + +module_init(vdpasim_net_init); +module_exit(vdpasim_net_exit); + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6ff8a5096691..4ce9f00ae10e 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!vhost_vq_is_setup(vq)) continue; - if (vhost_scsi_setup_vq_cmds(vq, vq->num)) + ret = vhost_scsi_setup_vq_cmds(vq, vq->num); + if (ret) goto destroy_vq_cmds; } diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 29ed4173f04e..ef688c8c0e0e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return -EFAULT; if (vhost_vdpa_config_validate(v, &config)) return -EINVAL; - buf = kvzalloc(config.len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - if (copy_from_user(buf, c->buf, config.len)) { - kvfree(buf); - return -EFAULT; - } + buf = vmemdup_user(c->buf, config.len); + if (IS_ERR(buf)) + return PTR_ERR(buf); ops->set_config(vdpa, config.off, buf, config.len); diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 181e2f18beae..9fc9ec4a25f5 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -27,20 +27,74 @@ static bool unplug_online = true; module_param(unplug_online, bool, 0644); MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); -enum virtio_mem_mb_state { +static bool force_bbm; +module_param(force_bbm, bool, 0444); +MODULE_PARM_DESC(force_bbm, + "Force Big Block Mode. Default is 0 (auto-selection)"); + +static unsigned long bbm_block_size; +module_param(bbm_block_size, ulong, 0444); +MODULE_PARM_DESC(bbm_block_size, + "Big Block size in bytes. Default is 0 (auto-detection)."); + +static bool bbm_safe_unplug = true; +module_param(bbm_safe_unplug, bool, 0444); +MODULE_PARM_DESC(bbm_safe_unplug, + "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); + +/* + * virtio-mem currently supports the following modes of operation: + * + * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The + * size of a Sub Block (SB) is determined based on the device block size, the + * pageblock size, and the maximum allocation granularity of the buddy. + * Subblocks within a Linux memory block might either be plugged or unplugged. + * Memory is added/removed to Linux MM in Linux memory block granularity. + * + * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. + * Memory is added/removed to Linux MM in Big Block granularity. + * + * The mode is determined automatically based on the Linux memory block size + * and the device block size. + * + * User space / core MM (auto onlining) is responsible for onlining added + * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are + * always onlined separately, and all memory within a Linux memory block is + * onlined to the same zone - virtio-mem relies on this behavior. + */ + +/* + * State of a Linux memory block in SBM. + */ +enum virtio_mem_sbm_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ - VIRTIO_MEM_MB_STATE_UNUSED = 0, + VIRTIO_MEM_SBM_MB_UNUSED = 0, /* (Partially) plugged, not added to Linux. Error on add_memory(). */ - VIRTIO_MEM_MB_STATE_PLUGGED, + VIRTIO_MEM_SBM_MB_PLUGGED, /* Fully plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE, + VIRTIO_MEM_SBM_MB_OFFLINE, /* Partially plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, /* Fully plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE, + VIRTIO_MEM_SBM_MB_ONLINE, /* Partially plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL, - VIRTIO_MEM_MB_STATE_COUNT + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_COUNT +}; + +/* + * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. + */ +enum virtio_mem_bbm_bb_state { + /* Unplugged, not added to Linux. Can be reused later. */ + VIRTIO_MEM_BBM_BB_UNUSED = 0, + /* Plugged, not added to Linux. Error on add_memory(). */ + VIRTIO_MEM_BBM_BB_PLUGGED, + /* Plugged and added to Linux. */ + VIRTIO_MEM_BBM_BB_ADDED, + /* All online parts are fake-offline, ready to remove. */ + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, + VIRTIO_MEM_BBM_BB_COUNT }; struct virtio_mem { @@ -51,6 +105,7 @@ struct virtio_mem { /* Workqueue that processes the plug/unplug requests. */ struct work_struct wq; + atomic_t wq_active; atomic_t config_changed; /* Virtqueue for guest->host requests. */ @@ -70,27 +125,13 @@ struct virtio_mem { /* The device block size (for communicating with the device). */ uint64_t device_block_size; - /* The translated node id. NUMA_NO_NODE in case not specified. */ + /* The determined node id for all memory of the device. */ int nid; /* Physical start address of the memory region. */ uint64_t addr; /* Maximum region size in bytes. */ uint64_t region_size; - /* The subblock size. */ - uint64_t subblock_size; - /* The number of subblocks per memory block. */ - uint32_t nb_sb_per_mb; - - /* Id of the first memory block of this device. */ - unsigned long first_mb_id; - /* Id of the last memory block of this device. */ - unsigned long last_mb_id; - /* Id of the last usable memory block of this device. */ - unsigned long last_usable_mb_id; - /* Id of the next memory bock to prepare when needed. */ - unsigned long next_mb_id; - /* The parent resource for all memory added via this device. */ struct resource *parent_resource; /* @@ -99,31 +140,79 @@ struct virtio_mem { */ const char *resource_name; - /* Summary of all memory block states. */ - unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; -#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10 - /* - * One byte state per memory block. - * - * Allocated via vmalloc(). When preparing new blocks, resized - * (alloc+copy+free) when needed (crossing pages with the next mb). - * (when crossing pages). - * - * With 128MB memory blocks, we have states for 512GB of memory in one - * page. + * We don't want to add too much memory if it's not getting onlined, + * to avoid running OOM. Besides this threshold, we allow to have at + * least two offline blocks at a time (whatever is bigger). */ - uint8_t *mb_state; +#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) + atomic64_t offline_size; + uint64_t offline_threshold; + + /* If set, the driver is in SBM, otherwise in BBM. */ + bool in_sbm; + + union { + struct { + /* Id of the first memory block of this device. */ + unsigned long first_mb_id; + /* Id of the last usable memory block of this device. */ + unsigned long last_usable_mb_id; + /* Id of the next memory bock to prepare when needed. */ + unsigned long next_mb_id; + + /* The subblock size. */ + uint64_t sb_size; + /* The number of subblocks per Linux memory block. */ + uint32_t sbs_per_mb; + + /* Summary of all memory block states. */ + unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; + + /* + * One byte state per memory block. Allocated via + * vmalloc(). Resized (alloc+copy+free) on demand. + * + * With 128 MiB memory blocks, we have states for 512 + * GiB of memory in one 4 KiB page. + */ + uint8_t *mb_states; + + /* + * Bitmap: one bit per subblock. Allocated similar to + * sbm.mb_states. + * + * A set bit means the corresponding subblock is + * plugged, otherwise it's unblocked. + * + * With 4 MiB subblocks, we manage 128 GiB of memory + * in one 4 KiB page. + */ + unsigned long *sb_states; + } sbm; + + struct { + /* Id of the first big block of this device. */ + unsigned long first_bb_id; + /* Id of the last usable big block of this device. */ + unsigned long last_usable_bb_id; + /* Id of the next device bock to prepare when needed. */ + unsigned long next_bb_id; + + /* Summary of all big block states. */ + unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; + + /* One byte state per big block. See sbm.mb_states. */ + uint8_t *bb_states; + + /* The block size used for plugging/adding/removing. */ + uint64_t bb_size; + } bbm; + }; /* - * $nb_sb_per_mb bit per memory block. Handled similar to mb_state. - * - * With 4MB subblocks, we manage 128GB of memory in one page. - */ - unsigned long *sb_bitmap; - - /* - * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap. + * Mutex that protects the sbm.mb_count, sbm.mb_states, + * sbm.sb_states, bbm.bb_count, and bbm.bb_states * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -160,6 +249,11 @@ static DEFINE_MUTEX(virtio_mem_mutex); static LIST_HEAD(virtio_mem_devices); static void virtio_mem_online_page_cb(struct page *page, unsigned int order); +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages); +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages); +static void virtio_mem_retry(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -212,6 +306,24 @@ static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) return mb_id * memory_block_size_bytes(); } +/* + * Calculate the big block id of a given address. + */ +static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, + uint64_t addr) +{ + return addr / vm->bbm.bb_size; +} + +/* + * Calculate the physical start address of a given big block id. + */ +static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, + unsigned long bb_id) +{ + return bb_id * vm->bbm.bb_size; +} + /* * Calculate the subblock id of a given address. */ @@ -221,89 +333,164 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); - return (addr - mb_addr) / vm->subblock_size; + return (addr - mb_addr) / vm->sbm.sb_size; } /* - * Set the state of a memory block, taking care of the state counter. + * Set the state of a big block, taking care of the state counter. */ -static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id, - enum virtio_mem_mb_state state) +static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, + unsigned long bb_id, + enum virtio_mem_bbm_bb_state state) { - const unsigned long idx = mb_id - vm->first_mb_id; - enum virtio_mem_mb_state old_state; + const unsigned long idx = bb_id - vm->bbm.first_bb_id; + enum virtio_mem_bbm_bb_state old_state; - old_state = vm->mb_state[idx]; - vm->mb_state[idx] = state; + old_state = vm->bbm.bb_states[idx]; + vm->bbm.bb_states[idx] = state; - BUG_ON(vm->nb_mb_state[old_state] == 0); - vm->nb_mb_state[old_state]--; - vm->nb_mb_state[state]++; + BUG_ON(vm->bbm.bb_count[old_state] == 0); + vm->bbm.bb_count[old_state]--; + vm->bbm.bb_count[state]++; } /* - * Get the state of a memory block. + * Get the state of a big block. */ -static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm, - unsigned long mb_id) +static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, + unsigned long bb_id) { - const unsigned long idx = mb_id - vm->first_mb_id; - - return vm->mb_state[idx]; + return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; } /* - * Prepare the state array for the next memory block. + * Prepare the big block state array for the next big block. */ -static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) { - unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1; - unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2; + unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; + unsigned long new_bytes = old_bytes + 1; int old_pages = PFN_UP(old_bytes); int new_pages = PFN_UP(new_bytes); - uint8_t *new_mb_state; + uint8_t *new_array; - if (vm->mb_state && old_pages == new_pages) + if (vm->bbm.bb_states && old_pages == new_pages) return 0; - new_mb_state = vzalloc(new_pages * PAGE_SIZE); - if (!new_mb_state) + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (vm->mb_state) - memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE); - vfree(vm->mb_state); - vm->mb_state = new_mb_state; + if (vm->bbm.bb_states) + memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); + vfree(vm->bbm.bb_states); + vm->bbm.bb_states = new_array; mutex_unlock(&vm->hotplug_mutex); return 0; } -#define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \ - for (_mb_id = _vm->first_mb_id; \ - _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \ - _mb_id++) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) +#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.first_bb_id; \ + _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id++) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) -#define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \ - for (_mb_id = _vm->next_mb_id - 1; \ - _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \ +#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.next_bb_id - 1; \ + _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id--) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) + +/* + * Set the state of a memory block, taking care of the state counter. + */ +static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, + unsigned long mb_id, uint8_t state) +{ + const unsigned long idx = mb_id - vm->sbm.first_mb_id; + uint8_t old_state; + + old_state = vm->sbm.mb_states[idx]; + vm->sbm.mb_states[idx] = state; + + BUG_ON(vm->sbm.mb_count[old_state] == 0); + vm->sbm.mb_count[old_state]--; + vm->sbm.mb_count[state]++; +} + +/* + * Get the state of a memory block. + */ +static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, + unsigned long mb_id) +{ + const unsigned long idx = mb_id - vm->sbm.first_mb_id; + + return vm->sbm.mb_states[idx]; +} + +/* + * Prepare the state array for the next memory block. + */ +static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) +{ + int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); + int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); + uint8_t *new_array; + + if (vm->sbm.mb_states && old_pages == new_pages) + return 0; + + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) + return -ENOMEM; + + mutex_lock(&vm->hotplug_mutex); + if (vm->sbm.mb_states) + memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); + vfree(vm->sbm.mb_states); + vm->sbm.mb_states = new_array; + mutex_unlock(&vm->hotplug_mutex); + + return 0; +} + +#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ + for (_mb_id = _vm->sbm.first_mb_id; \ + _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ + _mb_id++) \ + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) + +#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ + for (_mb_id = _vm->sbm.next_mb_id - 1; \ + _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id--) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) + +/* + * Calculate the bit number in the subblock bitmap for the given subblock + * inside the given memory block. + */ +static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, + unsigned long mb_id, int sb_id) +{ + return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; +} /* * Mark all selected subblocks plugged. * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_set(vm->sb_bitmap, bit, count); + __bitmap_set(vm->sbm.sb_states, bit, count); } /* @@ -311,105 +498,114 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_clear(vm->sb_bitmap, bit, count); + __bitmap_clear(vm->sbm.sb_states, bit, count); } /* * Test if all selected subblocks are plugged. */ -static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); if (count == 1) - return test_bit(bit, vm->sb_bitmap); + return test_bit(bit, vm->sbm.sb_states); /* TODO: Helper similar to bitmap_set() */ - return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >= + return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= bit + count; } /* * Test if all selected subblocks are unplugged. */ -static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); /* TODO: Helper similar to bitmap_set() */ - return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count; + return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= + bit + count; } /* - * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is + * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is * none. */ -static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm, +static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, unsigned long mb_id) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb; + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); - return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) - - bit; + return find_next_zero_bit(vm->sbm.sb_states, + bit + vm->sbm.sbs_per_mb, bit) - bit; } /* * Prepare the subblock bitmap for the next memory block. */ -static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { - const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; - const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb; - const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb; + const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; + const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; + const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); - unsigned long *new_sb_bitmap, *old_sb_bitmap; + unsigned long *new_bitmap, *old_bitmap; - if (vm->sb_bitmap && old_pages == new_pages) + if (vm->sbm.sb_states && old_pages == new_pages) return 0; - new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE); - if (!new_sb_bitmap) + new_bitmap = vzalloc(new_pages * PAGE_SIZE); + if (!new_bitmap) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (new_sb_bitmap) - memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE); + if (new_bitmap) + memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); - old_sb_bitmap = vm->sb_bitmap; - vm->sb_bitmap = new_sb_bitmap; + old_bitmap = vm->sbm.sb_states; + vm->sbm.sb_states = new_bitmap; mutex_unlock(&vm->hotplug_mutex); - vfree(old_sb_bitmap); + vfree(old_bitmap); return 0; } /* - * Try to add a memory block to Linux. This will usually only fail - * if out of memory. + * Test if we could add memory without creating too much offline memory - + * to avoid running OOM if memory is getting onlined deferred. + */ +static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) +{ + if (WARN_ON_ONCE(size > vm->offline_threshold)) + return false; + + return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; +} + +/* + * Try adding memory to Linux. Will usually only fail if out of memory. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); + int rc; /* * When force-unloading the driver and we still have memory added to @@ -422,53 +618,155 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) return -ENOMEM; } - dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), - vm->resource_name, - MEMHP_MERGE_RESOURCE); + dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + /* Memory might get onlined immediately. */ + atomic64_add(size, &vm->offline_size); + rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, + MEMHP_MERGE_RESOURCE); + if (rc) { + atomic64_sub(size, &vm->offline_size); + dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); + /* + * TODO: Linux MM does not properly clean up yet in all cases + * where adding of memory failed - especially on -ENOMEM. + */ + } + return rc; } /* - * Try to remove a memory block from Linux. Will only fail if the memory block - * is not offline. + * See virtio_mem_add_memory(): Try adding a single Linux memory block. + */ +static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_add_memory(vm, addr, size); +} + +/* + * See virtio_mem_add_memory(): Try adding a big block. + */ +static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_add_memory(vm, addr, size); +} + +/* + * Try removing memory from Linux. Will only fail if memory blocks aren't + * offline. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; + int rc; - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); - - dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - return remove_memory(nid, addr, memory_block_size_bytes()); + dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + rc = remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); + } + return rc; } /* - * Try to offline and remove a memory block from Linux. + * See virtio_mem_remove_memory(): Try removing a single Linux memory block. + */ +static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_remove_memory(vm, addr, size); +} + +/* + * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered + * by the big block. + */ +static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_remove_memory(vm, addr, size); +} + +/* + * Try offlining and removing memory from Linux. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, + uint64_t addr, + uint64_t size) +{ + int rc; + + dev_dbg(&vm->vdev->dev, + "offlining and removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + + rc = offline_and_remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, + "offlining and removing memory failed: %d\n", rc); + } + return rc; +} + +/* + * See virtio_mem_offline_and_remove_memory(): Try offlining and removing + * a single Linux memory block. + */ +static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, + unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; + const uint64_t size = memory_block_size_bytes(); - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); + return virtio_mem_offline_and_remove_memory(vm, addr, size); +} - dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", - mb_id); - return offline_and_remove_memory(nid, addr, memory_block_size_bytes()); +/* + * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a + * all Linux memory blocks covered by the big block. + */ +static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_offline_and_remove_memory(vm, addr, size); } /* @@ -499,31 +797,28 @@ static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) * Test if a virtio-mem device overlaps with the given range. Can be called * from (notifier) callbacks lockless. */ -static bool virtio_mem_overlaps_range(struct virtio_mem *vm, - unsigned long start, unsigned long size) +static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id); - unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) + - memory_block_size_bytes(); - - return start < dev_end && dev_start < start + size; + return start < vm->addr + vm->region_size && vm->addr < start + size; } /* - * Test if a virtio-mem device owns a memory block. Can be called from + * Test if a virtio-mem device contains a given range. Can be called from * (notifier) callbacks lockless. */ -static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id) +static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id; + return start >= vm->addr && start + size <= vm->addr + vm->region_size; } -static int virtio_mem_notify_going_online(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, + unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - case VIRTIO_MEM_MB_STATE_OFFLINE: + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + case VIRTIO_MEM_SBM_MB_OFFLINE: return NOTIFY_OK; default: break; @@ -533,106 +828,98 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm, return NOTIFY_BAD; } -static void virtio_mem_notify_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, + unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); break; - case VIRTIO_MEM_MB_STATE_ONLINE: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + case VIRTIO_MEM_SBM_MB_ONLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); break; default: BUG(); break; } +} +static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, + unsigned long mb_id) +{ + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); + break; + case VIRTIO_MEM_SBM_MB_OFFLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); + break; + default: + BUG(); + break; + } +} + +static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, + unsigned long mb_id) +{ + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); + unsigned long pfn; + int sb_id; + + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) + continue; + pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + + sb_id * vm->sbm.sb_size); + virtio_mem_fake_offline_going_offline(pfn, nr_pages); + } +} + +static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long mb_id) +{ + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); + unsigned long pfn; + int sb_id; + + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) + continue; + pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + + sb_id * vm->sbm.sb_size); + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); + } +} + +static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) +{ /* - * Trigger the workqueue, maybe we can now unplug memory. Also, - * when we offline and remove a memory block, this will re-trigger - * us immediately - which is often nice because the removal of - * the memory block (e.g., memmap) might have freed up memory - * on other memory blocks we manage. + * When marked as "fake-offline", all online memory of this device block + * is allocated by us. Otherwise, we don't have any memory allocated. */ - virtio_mem_retry(vm); + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_going_offline(pfn, nr_pages); } -static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) +static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) { - unsigned long nb_offline; - - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); - break; - case VIRTIO_MEM_MB_STATE_OFFLINE: - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE); - break; - default: - BUG(); - break; - } - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - - /* see if we can add new blocks now that we onlined one block */ - if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1) - virtio_mem_retry(vm); -} - -static void virtio_mem_notify_going_offline(struct virtio_mem *vm, - unsigned long mb_id) -{ - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); - struct page *page; - unsigned long pfn; - int sb_id, i; - - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) - continue; - /* - * Drop our reference to the pages so the memory can get - * offlined and add the unplugged pages to the managed - * page counters (so offlining code can correctly subtract - * them again). - */ - pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), nr_pages); - for (i = 0; i < nr_pages; i++) { - page = pfn_to_page(pfn + i); - if (WARN_ON(!page_ref_dec_and_test(page))) - dump_page(page, "unplugged page referenced"); - } - } -} - -static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, - unsigned long mb_id) -{ - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); - unsigned long pfn; - int sb_id, i; - - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) - continue; - /* - * Get the reference we dropped when going offline and - * subtract the unplugged pages from the managed page - * counters. - */ - pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); - for (i = 0; i < nr_pages; i++) - page_ref_inc(pfn_to_page(pfn + i)); - } + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); } /* @@ -648,20 +935,33 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, struct memory_notify *mhp = arg; const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long size = PFN_PHYS(mhp->nr_pages); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(start); int rc = NOTIFY_OK; + unsigned long id; if (!virtio_mem_overlaps_range(vm, start, size)) return NOTIFY_DONE; - /* - * Memory is onlined/offlined in memory block granularity. We cannot - * cross virtio-mem device boundaries and memory block boundaries. Bail - * out if this ever changes. - */ - if (WARN_ON_ONCE(size != memory_block_size_bytes() || - !IS_ALIGNED(start, memory_block_size_bytes()))) - return NOTIFY_BAD; + if (vm->in_sbm) { + id = virtio_mem_phys_to_mb_id(start); + /* + * In SBM, we add memory in separate memory blocks - we expect + * it to be onlined/offlined in the same granularity. Bail out + * if this ever changes. + */ + if (WARN_ON_ONCE(size != memory_block_size_bytes() || + !IS_ALIGNED(start, memory_block_size_bytes()))) + return NOTIFY_BAD; + } else { + id = virtio_mem_phys_to_bb_id(vm, start); + /* + * In BBM, we only care about onlining/offlining happening + * within a single big block, we don't care about the + * actual granularity as we don't track individual Linux + * memory blocks. + */ + if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) + return NOTIFY_BAD; + } /* * Avoid circular locking lockdep warnings. We lock the mutex @@ -680,7 +980,12 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - virtio_mem_notify_going_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_going_offline(vm, id); + else + virtio_mem_bbm_notify_going_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -690,22 +995,51 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - rc = virtio_mem_notify_going_online(vm, mb_id); + if (vm->in_sbm) + rc = virtio_mem_sbm_notify_going_online(vm, id); break; case MEM_OFFLINE: - virtio_mem_notify_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_offline(vm, id); + + atomic64_add(size, &vm->offline_size); + /* + * Trigger the workqueue. Now that we have some offline memory, + * maybe we can handle pending unplug requests. + */ + if (!unplug_online) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; case MEM_ONLINE: - virtio_mem_notify_online(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_online(vm, id); + + atomic64_sub(size, &vm->offline_size); + /* + * Start adding more memory once we onlined half of our + * threshold. Don't trigger if it's possibly due to our actipn + * (e.g., us adding memory which gets onlined immediately from + * the core). + */ + if (!atomic_read(&vm->wq_active) && + virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; case MEM_CANCEL_OFFLINE: if (!vm->hotplug_active) break; - virtio_mem_notify_cancel_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_cancel_offline(vm, id); + else + virtio_mem_bbm_notify_cancel_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -729,7 +1063,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, * (via generic_online_page()) using PageDirty(). */ static void virtio_mem_set_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -748,7 +1082,7 @@ static void virtio_mem_set_fake_offline(unsigned long pfn, * (via generic_online_page()), clear PageDirty(). */ static void virtio_mem_clear_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -763,16 +1097,17 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, * Release a range of fake-offline pages to the buddy, effectively * fake-onlining them. */ -static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) +static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { - const int order = MAX_ORDER - 1; - int i; + const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; + unsigned long i; /* - * We are always called with subblock granularity, which is at least - * aligned to MAX_ORDER - 1. + * We are always called at least with MAX_ORDER_NR_PAGES + * granularity/alignment (e.g., the way subblocks work). All pages + * inside such a block are alike. */ - for (i = 0; i < nr_pages; i += 1 << order) { + for (i = 0; i < nr_pages; i += max_nr_pages) { struct page *page = pfn_to_page(pfn + i); /* @@ -782,42 +1117,128 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) * alike. */ if (PageDirty(page)) { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, false); - generic_online_page(page, order); + generic_online_page(page, MAX_ORDER - 1); } else { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, true); - free_contig_range(pfn + i, 1 << order); - adjust_managed_page_count(page, 1 << order); + free_contig_range(pfn + i, max_nr_pages); + adjust_managed_page_count(page, max_nr_pages); } } } +/* + * Try to allocate a range, marking pages fake-offline, effectively + * fake-offlining them. + */ +static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +{ + const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) == + ZONE_MOVABLE; + int rc, retry_count; + + /* + * TODO: We want an alloc_contig_range() mode that tries to allocate + * harder (e.g., dealing with temporarily pinned pages, PCP), especially + * with ZONE_MOVABLE. So for now, retry a couple of times with + * ZONE_MOVABLE before giving up - because that zone is supposed to give + * some guarantees. + */ + for (retry_count = 0; retry_count < 5; retry_count++) { + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, + GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + else if (rc && !is_movable) + break; + else if (rc) + continue; + + virtio_mem_set_fake_offline(pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + return 0; + } + + return -EBUSY; +} + +/* + * Handle fake-offline pages when memory is going offline - such that the + * pages can be skipped by mm-core when offlining. + */ +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages) +{ + struct page *page; + unsigned long i; + + /* + * Drop our reference to the pages so the memory can get offlined + * and add the unplugged pages to the managed page counters (so + * offlining code can correctly subtract them again). + */ + adjust_managed_page_count(pfn_to_page(pfn), nr_pages); + /* Drop our reference to the pages so the memory can get offlined. */ + for (i = 0; i < nr_pages; i++) { + page = pfn_to_page(pfn + i); + if (WARN_ON(!page_ref_dec_and_test(page))) + dump_page(page, "fake-offline page referenced"); + } +} + +/* + * Handle fake-offline pages when memory offlining is canceled - to undo + * what we did in virtio_mem_fake_offline_going_offline(). + */ +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages) +{ + unsigned long i; + + /* + * Get the reference we dropped when going offline and subtract the + * unplugged pages from the managed page counters. + */ + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + for (i = 0; i < nr_pages; i++) + page_ref_inc(pfn_to_page(pfn + i)); +} + static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); + unsigned long id, sb_id; struct virtio_mem *vm; - int sb_id; + bool do_online; - /* - * We exploit here that subblocks have at least MAX_ORDER - 1 - * size/alignment and that this callback is is called with such a - * size/alignment. So we cannot cross subblocks and therefore - * also not memory blocks. - */ rcu_read_lock(); list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { - if (!virtio_mem_owned_mb(vm, mb_id)) + if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) continue; - sb_id = virtio_mem_phys_to_sb_id(vm, addr); - /* - * If plugged, online the pages, otherwise, set them fake - * offline (PageOffline). - */ - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (vm->in_sbm) { + /* + * We exploit here that subblocks have at least + * MAX_ORDER_NR_PAGES size/alignment - so we cannot + * cross subblocks within one call. + */ + id = virtio_mem_phys_to_mb_id(addr); + sb_id = virtio_mem_phys_to_sb_id(vm, addr); + do_online = virtio_mem_sbm_test_sb_plugged(vm, id, + sb_id, 1); + } else { + /* + * If the whole block is marked fake offline, keep + * everything that way. + */ + id = virtio_mem_phys_to_bb_id(vm, addr); + do_online = virtio_mem_bbm_get_bb_state(vm, id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; + } + if (do_online) generic_online_page(page, order); else virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, @@ -870,23 +1291,33 @@ static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size += size; return 0; case VIRTIO_MEM_RESP_NACK: - return -EAGAIN; + rc = -EAGAIN; + break; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, @@ -898,21 +1329,30 @@ static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size -= size; return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) @@ -920,6 +1360,9 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) const struct virtio_mem_req req = { .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "unplugging all memory"); switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: @@ -929,30 +1372,31 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) atomic_set(&vm->config_changed, 1); return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); + return rc; } /* * Plug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; - dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id, - sb_id, sb_id + count - 1); - rc = virtio_mem_send_plug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); return rc; } @@ -960,23 +1404,46 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, * Unplug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; - dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n", - mb_id, sb_id, sb_id + count - 1); - rc = virtio_mem_send_unplug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); return rc; } +/* + * Request to unplug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_unplug_request(vm, addr, size); +} + +/* + * Request to plug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_plug_request(vm, addr, size); +} + /* * Unplug the desired number of plugged subblocks of a offline or not-added * memory block. Will fail if any subblock cannot get unplugged (instead of @@ -986,29 +1453,29 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, - unsigned long mb_id, uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { int sb_id, count; int rc; - sb_id = vm->nb_sb_per_mb - 1; + sb_id = vm->sbm.sbs_per_mb - 1; while (*nb_sb) { /* Find the next candidate subblock */ while (sb_id >= 0 && - virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1)) + virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; /* Try to unplug multiple subblocks at a time */ count = 1; while (count < *nb_sb && sb_id > 0 && - virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { + virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { count++; sb_id--; } - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1025,63 +1492,50 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) { - uint64_t nb_sb = vm->nb_sb_per_mb; + uint64_t nb_sb = vm->sbm.sbs_per_mb; - return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb); + return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); } /* * Prepare tracking data for the next memory block. */ -static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, - unsigned long *mb_id) +static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, + unsigned long *mb_id) { int rc; - if (vm->next_mb_id > vm->last_usable_mb_id) + if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) return -ENOSPC; /* Resize the state array if required. */ - rc = virtio_mem_mb_state_prepare_next_mb(vm); + rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); if (rc) return rc; /* Resize the subblock bitmap if required. */ - rc = virtio_mem_sb_bitmap_prepare_next_mb(vm); + rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); if (rc) return rc; - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++; - *mb_id = vm->next_mb_id++; + vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; + *mb_id = vm->sbm.next_mb_id++; return 0; } -/* - * Don't add too many blocks that are not onlined yet to avoid running OOM. - */ -static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm) -{ - unsigned long nb_offline; - - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD; -} - /* * Try to plug the desired number of subblocks and add the memory block * to Linux. * * Will modify the state of the memory block. */ -static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { - const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb); - int rc, rc2; + const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); + int rc; if (WARN_ON_ONCE(!count)) return -EINVAL; @@ -1090,7 +1544,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Plug the requested number of subblocks before adding it to linux, * so that onlining will directly online all plugged subblocks. */ - rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); if (rc) return rc; @@ -1098,29 +1552,21 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Mark the block properly offline before adding it to Linux, * so the memory notifiers will find the block in the right state. */ - if (count == vm->nb_sb_per_mb) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + if (count == vm->sbm.sbs_per_mb) + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); /* Add the memory block to linux - if that fails, try to unplug. */ - rc = virtio_mem_mb_add(vm, mb_id); + rc = virtio_mem_sbm_add_mb(vm, mb_id); if (rc) { - enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED; + int new_state = VIRTIO_MEM_SBM_MB_UNUSED; - dev_err(&vm->vdev->dev, - "adding memory block %lu failed with %d\n", mb_id, rc); - rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count); - - /* - * TODO: Linux MM does not properly clean up yet in all cases - * where adding of memory failed - especially on -ENOMEM. - */ - if (rc2) - new_state = VIRTIO_MEM_MB_STATE_PLUGGED; - virtio_mem_mb_set_state(vm, mb_id, new_state); + if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) + new_state = VIRTIO_MEM_SBM_MB_PLUGGED; + virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); return rc; } @@ -1136,8 +1582,9 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * * Note: Can fail after some subblocks were successfully plugged. */ -static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, - uint64_t *nb_sb, bool online) +static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb, + bool online) { unsigned long pfn, nr_pages; int sb_id, count; @@ -1147,17 +1594,16 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, return -EINVAL; while (*nb_sb) { - sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id); - if (sb_id >= vm->nb_sb_per_mb) + sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); + if (sb_id >= vm->sbm.sbs_per_mb) break; count = 1; while (count < *nb_sb && - sb_id + count < vm->nb_sb_per_mb && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count, - 1)) + sb_id + count < vm->sbm.sbs_per_mb && + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; - rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1166,29 +1612,26 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, /* fake-online the pages if the memory block is online */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - nr_pages = PFN_DOWN(count * vm->subblock_size); + sb_id * vm->sbm.sb_size); + nr_pages = PFN_DOWN(count * vm->sbm.sb_size); virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { if (online) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); } return 0; } -/* - * Try to plug the requested amount of memory. - */ -static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1199,18 +1642,18 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to plug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true); + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true); if (rc || !nb_sb) goto out_unlock; cond_resched(); } /* Try to plug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false); + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1223,11 +1666,11 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_unlock(&vm->hotplug_mutex); /* Try to plug and add unused blocks */ - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) { - if (virtio_mem_too_many_mb_offline(vm)) + virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc || !nb_sb) return rc; cond_resched(); @@ -1235,13 +1678,13 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to prepare, plug and add new blocks */ while (nb_sb) { - if (virtio_mem_too_many_mb_offline(vm)) + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_prepare_next_mb(vm, &mb_id); + rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); if (rc) return rc; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc) return rc; cond_resched(); @@ -1253,6 +1696,112 @@ out_unlock: return rc; } +/* + * Plug a big block and add it to Linux. + * + * Will modify the state of the big block. + */ +static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_UNUSED)) + return -EINVAL; + + rc = virtio_mem_bbm_plug_bb(vm, bb_id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + + rc = virtio_mem_bbm_add_bb(vm, bb_id); + if (rc) { + if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + else + /* Retry from the main loop. */ + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + return rc; + } + return 0; +} + +/* + * Prepare tracking data for the next big block. + */ +static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, + unsigned long *bb_id) +{ + int rc; + + if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) + return -ENOSPC; + + /* Resize the big block state array if required. */ + rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); + if (rc) + return rc; + + vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; + *bb_id = vm->bbm.next_bb_id; + vm->bbm.next_bb_id++; + return 0; +} + +static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + unsigned long bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to plug and add unused big blocks */ + virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + cond_resched(); + } + + /* Try to prepare, plug and add new big blocks */ + while (nb_bb) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); + if (rc) + return rc; + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc) + return rc; + cond_resched(); + } + + return 0; +} + +/* + * Try to plug the requested amount of memory. + */ +static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_plug_request(vm, diff); + return virtio_mem_bbm_plug_request(vm, diff); +} + /* * Unplug the desired number of plugged subblocks of an offline memory block. * Will fail if any subblock cannot get unplugged (instead of skipping it). @@ -1262,33 +1811,33 @@ out_unlock: * * Note: Can fail after some subblocks were successfully unplugged. */ -static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc; - rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); + rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ - if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { /* * Remove the block from Linux - this should never fail. * Hinder the block from getting onlined by marking it * unplugged. Temporarily drop the mutex, so * any pending GOING_ONLINE requests can be serviced/rejected. */ - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_remove(vm, mb_id); + rc = virtio_mem_sbm_remove_mb(vm, mb_id); BUG_ON(rc); mutex_lock(&vm->hotplug_mutex); } @@ -1300,38 +1849,31 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, * * Will modify the state of the memory block. */ -static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count; + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; unsigned long start_pfn; int rc; start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - rc = alloc_contig_range(start_pfn, start_pfn + nr_pages, - MIGRATE_MOVABLE, GFP_KERNEL); - if (rc == -ENOMEM) - /* whoops, out of memory */ - return rc; - if (rc) - return -EBUSY; + sb_id * vm->sbm.sb_size); - /* Mark it as fake-offline before unplugging it */ - virtio_mem_set_fake_offline(start_pfn, nr_pages, true); - adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + rc = virtio_mem_fake_offline(start_pfn, nr_pages); + if (rc) + return rc; /* Try to unplug the allocated memory */ - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) { /* Return the memory to the buddy. */ virtio_mem_fake_online(start_pfn, nr_pages); return rc; } - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); return 0; } @@ -1345,34 +1887,34 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, * Note: Can fail after some subblocks were successfully unplugged. Can * return 0 even if subblocks were busy and could not get unplugged. */ -static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc, sb_id; /* If possible, try to unplug the complete block in one shot. */ - if (*nb_sb >= vm->nb_sb_per_mb && - virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, - vm->nb_sb_per_mb); + if (*nb_sb >= vm->sbm.sbs_per_mb && + virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, + vm->sbm.sbs_per_mb); if (!rc) { - *nb_sb -= vm->nb_sb_per_mb; + *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; } else if (rc != -EBUSY) return rc; } /* Fallback to single subblocks. */ - for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { + for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { /* Find the next candidate subblock */ while (sb_id >= 0 && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1); + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); if (rc == -EBUSY) continue; else if (rc) @@ -1386,24 +1928,21 @@ unplugged: * remove it. This will usually not fail, as no memory is in use * anymore - however some other notifiers might NACK the request. */ - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_offline_and_remove(vm, mb_id); + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); mutex_lock(&vm->hotplug_mutex); if (!rc) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; } -/* - * Try to unplug the requested amount of memory. - */ -static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1418,20 +1957,17 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to unplug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); } /* Try to unplug subblocks of plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1443,10 +1979,9 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1455,10 +1990,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1473,20 +2006,212 @@ out_unlock: return rc; } +/* + * Try to offline and remove a big block from Linux and unplug it. Will fail + * with -EBUSY if some memory is busy and cannot get unplugged. + * + * Will modify the state of the memory block. Might temporarily drop the + * hotplug_mutex. + */ +static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn; + struct page *page; + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + if (bbm_safe_unplug) { + /* + * Start by fake-offlining all memory. Once we marked the device + * block as fake-offline, all newly onlined memory will + * automatically be kept fake-offline. Protect from concurrent + * onlining/offlining until we have a consistent state. + */ + mutex_lock(&vm->hotplug_mutex); + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + if (rc) { + end_pfn = pfn; + goto rollback_safe_unplug; + } + } + mutex_unlock(&vm->hotplug_mutex); + } + + rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); + if (rc) { + if (bbm_safe_unplug) { + mutex_lock(&vm->hotplug_mutex); + goto rollback_safe_unplug; + } + return rc; + } + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; + +rollback_safe_unplug: + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + virtio_mem_fake_online(pfn, PAGES_PER_SECTION); + } + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + mutex_unlock(&vm->hotplug_mutex); + return rc; +} + +/* + * Try to remove a big block from Linux and unplug it. Will fail with + * -EBUSY if some memory is online. + * + * Will modify the state of the memory block. + */ +static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + rc = virtio_mem_bbm_remove_bb(vm, bb_id); + if (rc) + return -EBUSY; + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; +} + +/* + * Test if a big block is completely offline. + */ +static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, + unsigned long bb_id) +{ + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long pfn; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; + pfn += PAGES_PER_SECTION) { + if (pfn_to_online_page(pfn)) + return false; + } + + return true; +} + +static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + uint64_t bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to unplug completely offline big blocks first. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + /* + * As we're holding no locks, this check is racy as memory + * can get onlined in the meantime - but we'll fail gracefully. + */ + if (!virtio_mem_bbm_bb_is_offline(vm, bb_id)) + continue; + rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + if (!unplug_online) + return 0; + + /* Try to unplug any big blocks. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + return nb_bb ? -EBUSY : 0; +} + +/* + * Try to unplug the requested amount of memory. + */ +static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_unplug_request(vm, diff); + return virtio_mem_bbm_unplug_request(vm, diff); +} + /* * Try to unplug all blocks that couldn't be unplugged before, for example, * because the hypervisor was busy. */ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) { - unsigned long mb_id; + unsigned long id; int rc; - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) { - rc = virtio_mem_mb_unplug(vm, mb_id); + if (!vm->in_sbm) { + virtio_mem_bbm_for_each_bb(vm, id, + VIRTIO_MEM_BBM_BB_PLUGGED) { + rc = virtio_mem_bbm_unplug_bb(vm, id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, id, + VIRTIO_MEM_BBM_BB_UNUSED); + } + return 0; + } + + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { + rc = virtio_mem_sbm_unplug_mb(vm, id); if (rc) return rc; - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; @@ -1511,7 +2236,13 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); - vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; + + if (vm->in_sbm) + vm->sbm.last_usable_mb_id = + virtio_mem_phys_to_mb_id(end_addr) - 1; + else + vm->bbm.last_usable_bb_id = + virtio_mem_phys_to_bb_id(vm, end_addr) - 1; /* see if there is a request to change the size */ virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, @@ -1535,6 +2266,7 @@ static void virtio_mem_run_wq(struct work_struct *work) if (vm->broken) return; + atomic_set(&vm->wq_active, 1); retry: rc = 0; @@ -1595,6 +2327,8 @@ retry: "unknown error, marking device broken: %d\n", rc); vm->broken = true; } + + atomic_set(&vm->wq_active, 0); } static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) @@ -1631,6 +2365,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) static int virtio_mem_init(struct virtio_mem *vm) { const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS; + uint64_t sb_size, addr; uint16_t node_id; if (!vm->vdev->config->get) { @@ -1659,15 +2394,9 @@ static int virtio_mem_init(struct virtio_mem *vm) virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, &vm->region_size); - /* - * We always hotplug memory in memory block granularity. This way, - * we have to wait for exactly one memory block to online. - */ - if (vm->device_block_size > memory_block_size_bytes()) { - dev_err(&vm->vdev->dev, - "The block size is not supported (too big).\n"); - return -EINVAL; - } + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -1681,23 +2410,57 @@ static int virtio_mem_init(struct virtio_mem *vm) "Some memory is not addressable. This can make some memory unusable.\n"); /* - * Calculate the subblock size: - * - At least MAX_ORDER - 1 / pageblock_order. - * - At least the device block size. - * In the worst case, a single subblock per memory block. + * We want subblocks to span at least MAX_ORDER_NR_PAGES and + * pageblock_nr_pages pages. This: + * - Simplifies our page onlining code (virtio_mem_online_page_cb) + * and fake page onlining code (virtio_mem_fake_online). + * - Is required for now for alloc_contig_range() to work reliably - + * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1, - pageblock_order); - vm->subblock_size = max_t(uint64_t, vm->device_block_size, - vm->subblock_size); - vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size; + sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; + sb_size = max_t(uint64_t, vm->device_block_size, sb_size); - /* Round up to the next full memory block */ - vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + - memory_block_size_bytes()); - vm->next_mb_id = vm->first_mb_id; - vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr + - vm->region_size) - 1; + if (sb_size < memory_block_size_bytes() && !force_bbm) { + /* SBM: At least two subblocks per Linux memory block. */ + vm->in_sbm = true; + vm->sbm.sb_size = sb_size; + vm->sbm.sbs_per_mb = memory_block_size_bytes() / + vm->sbm.sb_size; + + /* Round up to the next full memory block */ + addr = vm->addr + memory_block_size_bytes() - 1; + vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); + vm->sbm.next_mb_id = vm->sbm.first_mb_id; + } else { + /* BBM: At least one Linux memory block. */ + vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, + memory_block_size_bytes()); + + if (bbm_block_size) { + if (!is_power_of_2(bbm_block_size)) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is not a power of 2"); + } else if (bbm_block_size < vm->bbm.bb_size) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is too small"); + } else { + vm->bbm.bb_size = bbm_block_size; + } + } + + /* Round up to the next aligned big block */ + addr = vm->addr + vm->bbm.bb_size - 1; + vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); + vm->bbm.next_bb_id = vm->bbm.first_bb_id; + } + + /* Prepare the offline threshold - make sure we can add two blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), + VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + /* In BBM, we also want at least two big blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, + vm->offline_threshold); dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); @@ -1705,9 +2468,13 @@ static int virtio_mem_init(struct virtio_mem *vm) (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); - dev_info(&vm->vdev->dev, "subblock size: 0x%llx", - (unsigned long long)vm->subblock_size); - if (vm->nid != NUMA_NO_NODE) + if (vm->in_sbm) + dev_info(&vm->vdev->dev, "subblock size: 0x%llx", + (unsigned long long)vm->sbm.sb_size); + else + dev_info(&vm->vdev->dev, "big block size: 0x%llx", + (unsigned long long)vm->bbm.bb_size); + if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); return 0; @@ -1753,6 +2520,20 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm) vm->parent_resource = NULL; } +static int virtio_mem_range_has_system_ram(struct resource *res, void *arg) +{ + return 1; +} + +static bool virtio_mem_has_memory_added(struct virtio_mem *vm) +{ + const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, + vm->addr + vm->region_size, NULL, + virtio_mem_range_has_system_ram) == 1; +} + static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; @@ -1849,21 +2630,24 @@ static void virtio_mem_remove(struct virtio_device *vdev) cancel_work_sync(&vm->wq); hrtimer_cancel(&vm->retry_timer); - /* - * After we unregistered our callbacks, user space can online partially - * plugged offline blocks. Make sure to remove them. - */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_remove(vm, mb_id); - BUG_ON(rc); - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + if (vm->in_sbm) { + /* + * After we unregistered our callbacks, user space can online + * partially plugged offline blocks. Make sure to remove them. + */ + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { + rc = virtio_mem_sbm_remove_mb(vm, mb_id); + BUG_ON(rc); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); + } + /* + * After we unregistered our callbacks, user space can no longer + * offline partially plugged online memory blocks. No need to + * worry about them. + */ } - /* - * After we unregistered our callbacks, user space can no longer - * offline partially plugged online memory blocks. No need to worry - * about them. - */ /* unregister callbacks */ unregister_virtio_mem_device(vm); @@ -1874,10 +2658,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) * the system. And there is no way to stop the driver/device from going * away. Warn at least. */ - if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) { + if (virtio_mem_has_memory_added(vm)) { dev_warn(&vdev->dev, "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); @@ -1885,8 +2666,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) } /* remove all tracking data - no locking needed */ - vfree(vm->mb_state); - vfree(vm->sb_bitmap); + if (vm->in_sbm) { + vfree(vm->sbm.mb_states); + vfree(vm->sbm.sb_states); + } else { + vfree(vm->bbm.bb_states); + } /* reset the device and cleanup the queues */ vdev->config->reset(vdev); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index becc77697960..71e16b53e9c1 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; err_desc_extra: @@ -1676,9 +1676,9 @@ err_desc_extra: err_desc_state: kfree(vq); err_vq: - vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); err_device: - vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); err_driver: vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); err_ring: @@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_used_idx = 0; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f22e37337030..7ff941e71b79 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -386,6 +386,7 @@ config ARM_SBSA_WATCHDOG config ARMADA_37XX_WATCHDOG tristate "Armada 37xx watchdog" depends on ARCH_MVEBU || COMPILE_TEST + depends on HAS_IOMEM select MFD_SYSCON select WATCHDOG_CORE help @@ -631,7 +632,7 @@ config SUNXI_WATCHDOG config COH901327_WATCHDOG bool "ST-Ericsson COH 901 327 watchdog" - depends on ARCH_U300 || (ARM && COMPILE_TEST) + depends on ARCH_U300 || (ARM && COMMON_CLK && COMPILE_TEST) default y if MACH_U300 select WATCHDOG_CORE help @@ -789,6 +790,7 @@ config MOXART_WDT config SIRFSOC_WATCHDOG tristate "SiRFSOC watchdog" + depends on HAS_IOMEM depends on ARCH_SIRF || COMPILE_TEST select WATCHDOG_CORE default y @@ -1696,16 +1698,6 @@ config WDT_MTX1 Hardware driver for the MTX-1 boards. This is a watchdog timer that will reboot the machine after a 100 seconds timer expired. -config PNX833X_WDT - tristate "PNX833x Hardware Watchdog" - depends on SOC_PNX8335 - depends on BROKEN - help - Hardware driver for the PNX833x's watchdog. This is a - watchdog timer that will reboot the machine after a programmable - timer has expired and no process has written to /dev/watchdog during - that time. - config SIBYTE_WDOG tristate "Sibyte SoC hardware watchdog" depends on CPU_SB1 || (MIPS && COMPILE_TEST) diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 071a2e50be98..5c74ee19d441 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -161,7 +161,6 @@ obj-$(CONFIG_RC32434_WDT) += rc32434_wdt.o obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o -obj-$(CONFIG_PNX833X_WDT) += pnx833x_wdt.o obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o obj-$(CONFIG_AR7_WDT) += ar7_wdt.o obj-$(CONFIG_TXX9_WDT) += txx9wdt.o diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 83418924e30a..0b699c783d57 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -150,8 +150,6 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd, case WDIOC_GETSUPPORT: return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; - break; - case WDIOC_GETSTATUS: case WDIOC_GETBOOTSTATUS: return put_user(0, p); diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 7d34bcf1c45b..cbd1498ff015 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -21,8 +21,9 @@ #include #include #include +#include -#define HPWDT_VERSION "2.0.3" +#define HPWDT_VERSION "2.0.4" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TICKS 65535 @@ -334,6 +335,11 @@ static int hpwdt_init_one(struct pci_dev *dev, watchdog_set_nowayout(&hpwdt_dev, nowayout); watchdog_init_timeout(&hpwdt_dev, soft_margin, NULL); + if (is_kdump_kernel()) { + pretimeout = 0; + kdumptimeout = 0; + } + if (pretimeout && hpwdt_dev.timeout <= PRETIMEOUT_SEC) { dev_warn(&dev->dev, "timeout <= pretimeout. Setting pretimeout to zero\n"); pretimeout = 0; diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index a370a185a41c..bf31d7b67a69 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -40,8 +40,6 @@ * Includes, defines, variables, module parameters, ... */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - /* Module and version information */ #define DRV_NAME "iTCO_wdt" #define DRV_VERSION "1.11" @@ -279,7 +277,7 @@ static int iTCO_wdt_start(struct watchdog_device *wd_dev) /* disable chipset's NO_REBOOT bit */ if (p->update_no_reboot_bit(p->no_reboot_priv, false)) { spin_unlock(&p->io_lock); - pr_err("failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n"); + dev_err(wd_dev->parent, "failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n"); return -EIO; } @@ -510,7 +508,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) /* Check chipset's NO_REBOOT bit */ if (p->update_no_reboot_bit(p->no_reboot_priv, false) && iTCO_vendor_check_noreboot_on()) { - pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); + dev_info(dev, "unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); return -ENODEV; /* Cannot reset NO_REBOOT bit */ } @@ -530,12 +528,12 @@ static int iTCO_wdt_probe(struct platform_device *pdev) if (!devm_request_region(dev, p->tco_res->start, resource_size(p->tco_res), pdev->name)) { - pr_err("I/O address 0x%04llx already in use, device disabled\n", + dev_err(dev, "I/O address 0x%04llx already in use, device disabled\n", (u64)TCOBASE(p)); return -EBUSY; } - pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n", + dev_info(dev, "Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n", pdata->name, pdata->version, (u64)TCOBASE(p)); /* Clear out the (probably old) status */ @@ -558,7 +556,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) break; } - p->wddev.info = &ident, + p->wddev.info = &ident, p->wddev.ops = &iTCO_wdt_ops, p->wddev.bootstatus = 0; p->wddev.timeout = WATCHDOG_TIMEOUT; @@ -575,7 +573,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) if not reset to the default */ if (iTCO_wdt_set_timeout(&p->wddev, heartbeat)) { iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT); - pr_info("timeout value out of range, using %d\n", + dev_info(dev, "timeout value out of range, using %d\n", WATCHDOG_TIMEOUT); } @@ -583,11 +581,11 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_stop_on_unregister(&p->wddev); ret = devm_watchdog_register_device(dev, &p->wddev); if (ret != 0) { - pr_err("cannot register watchdog device (err=%d)\n", ret); + dev_err(dev, "cannot register watchdog device (err=%d)\n", ret); return ret; } - pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n", + dev_info(dev, "initialized. heartbeat=%d sec (nowayout=%d)\n", heartbeat, nowayout); return 0; @@ -651,21 +649,7 @@ static struct platform_driver iTCO_wdt_driver = { }, }; -static int __init iTCO_wdt_init_module(void) -{ - pr_info("Intel TCO WatchDog Timer Driver v%s\n", DRV_VERSION); - - return platform_driver_register(&iTCO_wdt_driver); -} - -static void __exit iTCO_wdt_cleanup_module(void) -{ - platform_driver_unregister(&iTCO_wdt_driver); - pr_info("Watchdog Module Unloaded\n"); -} - -module_init(iTCO_wdt_init_module); -module_exit(iTCO_wdt_cleanup_module); +module_platform_driver(iTCO_wdt_driver); MODULE_AUTHOR("Wim Van Sebroeck "); MODULE_DESCRIPTION("Intel TCO WatchDog Timer Driver"); diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 3fc457bc16db..2f7ded32e878 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -175,8 +175,8 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev) spin_lock_init(&ddata->lock); - ddata->wdd.info = &mpc8xxx_wdt_info, - ddata->wdd.ops = &mpc8xxx_wdt_ops, + ddata->wdd.info = &mpc8xxx_wdt_info; + ddata->wdd.ops = &mpc8xxx_wdt_ops; ddata->wdd.timeout = WATCHDOG_TIMEOUT; watchdog_init_timeout(&ddata->wdd, timeout, dev); diff --git a/drivers/watchdog/pnx833x_wdt.c b/drivers/watchdog/pnx833x_wdt.c deleted file mode 100644 index 4097d076aab8..000000000000 --- a/drivers/watchdog/pnx833x_wdt.c +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PNX833x Hardware Watchdog Driver - * Copyright 2008 NXP Semiconductors - * Daniel Laird - * Andre McCurdy - * - * Heavily based upon - IndyDog 0.3 - * A Hardware Watchdog Device for SGI IP22 - * - * (c) Copyright 2002 Guido Guenther , All Rights Reserved. - * - * based on softdog.c by Alan Cox - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define WATCHDOG_TIMEOUT 30 /* 30 sec Maximum timeout */ -#define WATCHDOG_COUNT_FREQUENCY 68000000U /* Watchdog counts at 68MHZ. */ -#define PNX_WATCHDOG_TIMEOUT (WATCHDOG_TIMEOUT * WATCHDOG_COUNT_FREQUENCY) -#define PNX_TIMEOUT_VALUE 2040000000U - -/** CONFIG block */ -#define PNX833X_CONFIG (0x07000U) -#define PNX833X_CONFIG_CPU_WATCHDOG (0x54) -#define PNX833X_CONFIG_CPU_WATCHDOG_COMPARE (0x58) -#define PNX833X_CONFIG_CPU_COUNTERS_CONTROL (0x1c) - -/** RESET block */ -#define PNX833X_RESET (0x08000U) -#define PNX833X_RESET_CONFIG (0x08) - -static int pnx833x_wdt_alive; - -/* Set default timeout in MHZ.*/ -static int pnx833x_wdt_timeout = PNX_WATCHDOG_TIMEOUT; -module_param(pnx833x_wdt_timeout, int, 0); -MODULE_PARM_DESC(timeout, "Watchdog timeout in Mhz. (68Mhz clock), default=" - __MODULE_STRING(PNX_TIMEOUT_VALUE) "(30 seconds)."); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" - __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); - -#define START_DEFAULT 1 -static int start_enabled = START_DEFAULT; -module_param(start_enabled, int, 0); -MODULE_PARM_DESC(start_enabled, "Watchdog is started on module insertion " - "(default=" __MODULE_STRING(START_DEFAULT) ")"); - -static void pnx833x_wdt_start(void) -{ - /* Enable watchdog causing reset. */ - PNX833X_REG(PNX833X_RESET + PNX833X_RESET_CONFIG) |= 0x1; - /* Set timeout.*/ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout; - /* Enable watchdog. */ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_COUNTERS_CONTROL) |= 0x1; - - pr_info("Started watchdog timer\n"); -} - -static void pnx833x_wdt_stop(void) -{ - /* Disable watchdog causing reset. */ - PNX833X_REG(PNX833X_RESET + PNX833X_CONFIG) &= 0xFFFFFFFE; - /* Disable watchdog.*/ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_COUNTERS_CONTROL) &= 0xFFFFFFFE; - - pr_info("Stopped watchdog timer\n"); -} - -static void pnx833x_wdt_ping(void) -{ - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout; -} - -/* - * Allow only one person to hold it open - */ -static int pnx833x_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &pnx833x_wdt_alive)) - return -EBUSY; - - if (nowayout) - __module_get(THIS_MODULE); - - /* Activate timer */ - if (!start_enabled) - pnx833x_wdt_start(); - - pnx833x_wdt_ping(); - - pr_info("Started watchdog timer\n"); - - return stream_open(inode, file); -} - -static int pnx833x_wdt_release(struct inode *inode, struct file *file) -{ - /* Shut off the timer. - * Lock it in if it's a module and we defined ...NOWAYOUT */ - if (!nowayout) - pnx833x_wdt_stop(); /* Turn the WDT off */ - - clear_bit(0, &pnx833x_wdt_alive); - return 0; -} - -static ssize_t pnx833x_wdt_write(struct file *file, const char *data, size_t len, loff_t *ppos) -{ - /* Refresh the timer. */ - if (len) - pnx833x_wdt_ping(); - - return len; -} - -static long pnx833x_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int options, new_timeout = 0; - uint32_t timeout, timeout_left = 0; - - static const struct watchdog_info ident = { - .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, - .firmware_version = 0, - .identity = "Hardware Watchdog for PNX833x", - }; - - switch (cmd) { - default: - return -ENOTTY; - - case WDIOC_GETSUPPORT: - if (copy_to_user((struct watchdog_info *)arg, - &ident, sizeof(ident))) - return -EFAULT; - return 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, (int *)arg); - - case WDIOC_SETOPTIONS: - if (get_user(options, (int *)arg)) - return -EFAULT; - - if (options & WDIOS_DISABLECARD) - pnx833x_wdt_stop(); - - if (options & WDIOS_ENABLECARD) - pnx833x_wdt_start(); - - return 0; - - case WDIOC_KEEPALIVE: - pnx833x_wdt_ping(); - return 0; - - case WDIOC_SETTIMEOUT: - { - if (get_user(new_timeout, (int *)arg)) - return -EFAULT; - - pnx833x_wdt_timeout = new_timeout; - PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = new_timeout; - return put_user(new_timeout, (int *)arg); - } - - case WDIOC_GETTIMEOUT: - timeout = PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG_COMPARE); - return put_user(timeout, (int *)arg); - - case WDIOC_GETTIMELEFT: - timeout_left = PNX833X_REG(PNX833X_CONFIG + - PNX833X_CONFIG_CPU_WATCHDOG); - return put_user(timeout_left, (int *)arg); - - } -} - -static int pnx833x_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) -{ - if (code == SYS_DOWN || code == SYS_HALT) - pnx833x_wdt_stop(); /* Turn the WDT off */ - - return NOTIFY_DONE; -} - -static const struct file_operations pnx833x_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = pnx833x_wdt_write, - .unlocked_ioctl = pnx833x_wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = pnx833x_wdt_open, - .release = pnx833x_wdt_release, -}; - -static struct miscdevice pnx833x_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &pnx833x_wdt_fops, -}; - -static struct notifier_block pnx833x_wdt_notifier = { - .notifier_call = pnx833x_wdt_notify_sys, -}; - -static int __init watchdog_init(void) -{ - int ret, cause; - - /* Lets check the reason for the reset.*/ - cause = PNX833X_REG(PNX833X_RESET); - /*If bit 31 is set then watchdog was cause of reset.*/ - if (cause & 0x80000000) { - pr_info("The system was previously reset due to the watchdog firing - please investigate...\n"); - } - - ret = register_reboot_notifier(&pnx833x_wdt_notifier); - if (ret) { - pr_err("cannot register reboot notifier (err=%d)\n", ret); - return ret; - } - - ret = misc_register(&pnx833x_wdt_miscdev); - if (ret) { - pr_err("cannot register miscdev on minor=%d (err=%d)\n", - WATCHDOG_MINOR, ret); - unregister_reboot_notifier(&pnx833x_wdt_notifier); - return ret; - } - - pr_info("Hardware Watchdog Timer for PNX833x: Version 0.1\n"); - - if (start_enabled) - pnx833x_wdt_start(); - - return 0; -} - -static void __exit watchdog_exit(void) -{ - misc_deregister(&pnx833x_wdt_miscdev); - unregister_reboot_notifier(&pnx833x_wdt_notifier); -} - -module_init(watchdog_init); -module_exit(watchdog_exit); - -MODULE_AUTHOR("Daniel Laird/Andre McCurdy"); -MODULE_DESCRIPTION("Hardware Watchdog Device for PNX833x"); -MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index ab7465d186fd..7cf0f2ec649b 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -148,10 +148,17 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, */ wmb(); - msleep(150); + mdelay(150); return 0; } +static int qcom_wdt_is_running(struct watchdog_device *wdd) +{ + struct qcom_wdt *wdt = to_qcom_wdt(wdd); + + return (readl(wdt_addr(wdt, WDT_EN)) & QCOM_WDT_ENABLE); +} + static const struct watchdog_ops qcom_wdt_ops = { .start = qcom_wdt_start, .stop = qcom_wdt_stop, @@ -294,6 +301,17 @@ static int qcom_wdt_probe(struct platform_device *pdev) wdt->wdd.timeout = min(wdt->wdd.max_timeout, 30U); watchdog_init_timeout(&wdt->wdd, 0, dev); + /* + * If WDT is already running, call WDT start which + * will stop the WDT, set timeouts as bootloader + * might use different ones and set running bit + * to inform the WDT subsystem to ping the WDT + */ + if (qcom_wdt_is_running(&wdt->wdd)) { + qcom_wdt_start(&wdt->wdd); + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); + } + ret = devm_watchdog_register_device(dev, &wdt->wdd); if (ret) return ret; diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 836319cbaca9..359302f71f7e 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -227,8 +227,10 @@ static int rti_wdt_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); - if (ret) + if (ret) { + pm_runtime_put_noidle(dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); + } platform_set_drvdata(pdev, wdt); diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c index 04483d6453d6..13db71e16583 100644 --- a/drivers/watchdog/sbc_fitpc2_wdt.c +++ b/drivers/watchdog/sbc_fitpc2_wdt.c @@ -78,7 +78,7 @@ static int fitpc2_wdt_open(struct inode *inode, struct file *file) return stream_open(inode, file); } -static ssize_t fitpc2_wdt_write(struct file *file, const char *data, +static ssize_t fitpc2_wdt_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { size_t i; @@ -125,16 +125,16 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case WDIOC_GETSUPPORT: - ret = copy_to_user((struct watchdog_info *)arg, &ident, + ret = copy_to_user((struct watchdog_info __user *)arg, &ident, sizeof(ident)) ? -EFAULT : 0; break; case WDIOC_GETSTATUS: - ret = put_user(0, (int *)arg); + ret = put_user(0, (int __user *)arg); break; case WDIOC_GETBOOTSTATUS: - ret = put_user(0, (int *)arg); + ret = put_user(0, (int __user *)arg); break; case WDIOC_KEEPALIVE: @@ -143,7 +143,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, break; case WDIOC_SETTIMEOUT: - ret = get_user(time, (int *)arg); + ret = get_user(time, (int __user *)arg); if (ret) break; @@ -157,7 +157,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, fallthrough; case WDIOC_GETTIMEOUT: - ret = put_user(margin, (int *)arg); + ret = put_user(margin, (int __user *)arg); break; } diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 190d26e2e75f..958dc32a708f 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -291,6 +291,7 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id) set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); } + watchdog_stop_on_reboot(&wdt->wdd); ret = watchdog_register_device(&wdt->wdd); if (ret) goto err; diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index 65cb55f3916f..4e689b6ff141 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -53,7 +54,7 @@ #define SPRD_WDT_CNT_HIGH_SHIFT 16 #define SPRD_WDT_LOW_VALUE_MASK GENMASK(15, 0) -#define SPRD_WDT_LOAD_TIMEOUT 1000 +#define SPRD_WDT_LOAD_TIMEOUT 11 struct sprd_wdt { void __iomem *base; @@ -108,6 +109,23 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, u32 tmr_step = timeout * SPRD_WDT_CNT_STEP; u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP; + /* + * Checking busy bit to make sure the previous loading operation is + * done. According to the specification, the busy bit would be set + * after a new loading operation and last 2 or 3 RTC clock + * cycles (about 60us~92us). + */ + do { + val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW); + if (!(val & SPRD_WDT_LD_BUSY_BIT)) + break; + + usleep_range(10, 100); + } while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT); + + if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT) + return -EBUSY; + sprd_wdt_unlock(wdt->base); writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) & SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH); @@ -120,20 +138,6 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout, wdt->base + SPRD_WDT_IRQ_LOAD_LOW); sprd_wdt_lock(wdt->base); - /* - * Waiting the load value operation done, - * it needs two or three RTC clock cycles. - */ - do { - val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW); - if (!(val & SPRD_WDT_LD_BUSY_BIT)) - break; - - cpu_relax(); - } while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT); - - if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT) - return -EBUSY; return 0; } @@ -345,15 +349,10 @@ static int __maybe_unused sprd_wdt_pm_resume(struct device *dev) if (ret) return ret; - if (watchdog_active(&wdt->wdd)) { + if (watchdog_active(&wdt->wdd)) ret = sprd_wdt_start(&wdt->wdd); - if (ret) { - sprd_wdt_disable(wdt); - return ret; - } - } - return 0; + return ret; } static const struct dev_pm_ops sprd_wdt_pm_ops = { diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index 25188d6bbe15..a3436c296c97 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -162,18 +162,15 @@ static int stm32_iwdg_clk_init(struct platform_device *pdev, u32 ret; wdt->clk_lsi = devm_clk_get(dev, "lsi"); - if (IS_ERR(wdt->clk_lsi)) { - dev_err(dev, "Unable to get lsi clock\n"); - return PTR_ERR(wdt->clk_lsi); - } + if (IS_ERR(wdt->clk_lsi)) + return dev_err_probe(dev, PTR_ERR(wdt->clk_lsi), "Unable to get lsi clock\n"); /* optional peripheral clock */ if (wdt->data->has_pclk) { wdt->clk_pclk = devm_clk_get(dev, "pclk"); - if (IS_ERR(wdt->clk_pclk)) { - dev_err(dev, "Unable to get pclk clock\n"); - return PTR_ERR(wdt->clk_pclk); - } + if (IS_ERR(wdt->clk_pclk)) + return dev_err_probe(dev, PTR_ERR(wdt->clk_pclk), + "Unable to get pclk clock\n"); ret = clk_prepare_enable(wdt->clk_pclk); if (ret) { diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 423844757812..0e9a99559609 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -267,15 +267,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + if (!wdd->ops->stop) + pr_warn("watchdog%d: stop_on_reboot not supported\n", wdd->id); + else { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - ret = register_reboot_notifier(&wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - ida_simple_remove(&watchdog_ida, id); - return ret; + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } } } diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 3065dd670a18..cec7917790e5 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -34,9 +34,9 @@ struct wdat_instruction { * @period: How long is one watchdog period in ms * @stopped_in_sleep: Is this watchdog stopped by the firmware in S1-S5 * @stopped: Was the watchdog stopped by the driver in suspend - * @actions: An array of instruction lists indexed by an action number from - * the WDAT table. There can be %NULL entries for not implemented - * actions. + * @instructions: An array of instruction lists indexed by an action number from + * the WDAT table. There can be %NULL entries for not implemented + * actions. */ struct wdat_wdt { struct platform_device *pdev; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5..a8030332a191 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -95,7 +95,8 @@ struct irq_info { struct list_head list; struct list_head eoi_list; short refcnt; - short spurious_cnt; + u8 spurious_cnt; + u8 is_accounted; enum xen_irq_type type; /* type */ unsigned irq; evtchn_port_t evtchn; /* event channel */ @@ -161,6 +162,9 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Event channel distribution data */ +static atomic_t channels_on_cpu[NR_CPUS]; + static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; @@ -257,6 +261,32 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +/* Per CPU channel accounting */ +static void channels_on_cpu_dec(struct irq_info *info) +{ + if (!info->is_accounted) + return; + + info->is_accounted = 0; + + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); +} + +static void channels_on_cpu_inc(struct irq_info *info) +{ + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, + INT_MAX))) + return; + + info->is_accounted = 1; +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -339,6 +369,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); info->evtchn = 0; + channels_on_cpu_dec(info); } /* @@ -433,18 +464,25 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) +static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, + bool force_affinity) { int irq = get_evtchn_to_irq(evtchn); struct irq_info *info = info_for_irq(irq); BUG_ON(irq == -1); -#ifdef CONFIG_SMP - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); -#endif + + if (IS_ENABLED(CONFIG_SMP) && force_affinity) { + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); + cpumask_copy(irq_get_effective_affinity_mask(irq), + cpumask_of(cpu)); + } + xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + channels_on_cpu_dec(info); info->cpu = cpu; + channels_on_cpu_inc(info); } /** @@ -523,8 +561,10 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) return; if (spurious) { - if ((1 << info->spurious_cnt) < (HZ << 2)) - info->spurious_cnt++; + if ((1 << info->spurious_cnt) < (HZ << 2)) { + if (info->spurious_cnt != 0xFF) + info->spurious_cnt++; + } if (info->spurious_cnt > 1) { delay = 1 << (info->spurious_cnt - 2); if (delay > HZ) @@ -615,11 +655,6 @@ static void xen_irq_init(unsigned irq) { struct irq_info *info; -#ifdef CONFIG_SMP - /* By default all event channels notify CPU#0. */ - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); -#endif - info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) panic("Unable to allocate metadata for IRQ%d\n", irq); @@ -628,6 +663,11 @@ static void xen_irq_init(unsigned irq) info->refcnt = -1; set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); @@ -739,18 +779,7 @@ static void eoi_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -794,7 +823,7 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0); + bind_evtchn_to_cpu(evtchn, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -1113,8 +1142,14 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) irq = ret; goto out; } - /* New interdomain events are bound to VCPU 0. */ - bind_evtchn_to_cpu(evtchn, 0); + /* + * New interdomain events are initially bound to vCPU0 This + * is required to setup the event channel in the first + * place and also important for UP guests because the + * affinity setting is not invoked on them so nothing would + * bind the channel. + */ + bind_evtchn_to_cpu(evtchn, 0, false); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); @@ -1132,12 +1167,6 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); -int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) -{ - return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); - static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -1168,7 +1197,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq = ret; goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask to the target CPU so proc shows + * the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, true); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_IPI); @@ -1281,7 +1314,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask for percpu interrupts so proc + * shows the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, percpu); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_VIRQ); @@ -1646,9 +1683,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu); - /* This will be deferred until interrupt is processed */ - irq_set_affinity(irq, cpumask_of(info->cpu)); + bind_evtchn_to_cpu(evtchn, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1682,7 +1717,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); + bind_evtchn_to_cpu(evtchn, tcpu, false); if (!masked) unmask_evtchn(evtchn); @@ -1690,27 +1725,47 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) return 0; } +/* + * Find the CPU within @dest mask which has the least number of channels + * assigned. This is not precise as the per cpu counts can be modified + * concurrently. + */ +static unsigned int select_target_cpu(const struct cpumask *dest) +{ + unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; + + for_each_cpu_and(cpu, dest, cpu_online_mask) { + unsigned int curch = atomic_read(&channels_on_cpu[cpu]); + + if (curch < minch) { + minch = curch; + best_cpu = cpu; + } + } + + /* + * Catch the unlikely case that dest contains no online CPUs. Can't + * recurse. + */ + if (best_cpu == UINT_MAX) + return select_target_cpu(cpu_online_mask); + + return best_cpu; +} + static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { - unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); - int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + unsigned int tcpu = select_target_cpu(dest); + int ret; + ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); return ret; } -/* To be called with desc->lock held. */ -int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - - return set_affinity_irq(d, cpumask_of(tcpu), false); -} -EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); - static void enable_dynirq(struct irq_data *data) { evtchn_port_t evtchn = evtchn_from_irq(data->irq); @@ -1734,18 +1789,7 @@ static void ack_dynirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); } static void mask_ack_dynirq(struct irq_data *data) @@ -1830,7 +1874,8 @@ static void restore_cpu_virqs(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } @@ -1855,7 +1900,8 @@ static void restore_cpu_ipis(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } @@ -1938,8 +1984,12 @@ void xen_irq_resume(void) xen_evtchn_resume(); /* No IRQ <-> event-channel mappings. */ - list_for_each_entry(info, &xen_irq_list_head, list) - info->evtchn = 0; /* zap event-channel binding */ + list_for_each_entry(info, &xen_irq_list_head, list) { + /* Zap event-channel binding */ + info->evtchn = 0; + /* Adjust accounting */ + channels_on_cpu_dec(info); + } clear_evtchn_to_irq_all(); diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 5dc016d68f83..a7a85719a8c8 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -421,36 +421,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u, del_evtchn(u, evtchn); } -static DEFINE_PER_CPU(int, bind_last_selected_cpu); - -static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) -{ - unsigned int selected_cpu, irq; - struct irq_desc *desc; - unsigned long flags; - - irq = irq_from_evtchn(evtchn); - desc = irq_to_desc(irq); - - if (!desc) - return; - - raw_spin_lock_irqsave(&desc->lock, flags); - selected_cpu = this_cpu_read(bind_last_selected_cpu); - selected_cpu = cpumask_next_and(selected_cpu, - desc->irq_common_data.affinity, cpu_online_mask); - - if (unlikely(selected_cpu >= nr_cpu_ids)) - selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, - cpu_online_mask); - - this_cpu_write(bind_last_selected_cpu, selected_cpu); - - /* unmask expects irqs to be disabled */ - xen_set_affinity_evtchn(desc, selected_cpu); - raw_spin_unlock_irqrestore(&desc->lock, flags); -} - static long evtchn_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -508,10 +478,8 @@ static long evtchn_ioctl(struct file *file, break; rc = evtchn_bind_to_user(u, bind_interdomain.local_port); - if (rc == 0) { + if (rc == 0) rc = bind_interdomain.local_port; - evtchn_bind_interdom_next_vcpu(rc); - } break; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e56ee1f2652..9293045e128c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1,9 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/block_dev.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli SuSE + * Copyright (C) 2016 - 2020 Christoph Hellwig */ #include diff --git a/fs/dcache.c b/fs/dcache.c index ea0485861d93..97e81a844a96 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -793,10 +793,17 @@ static inline bool fast_dput(struct dentry *dentry) * a reference to the dentry and change that, but * our work is done - we can leave the dentry * around with a zero refcount. + * + * Nevertheless, there are two cases that we should kill + * the dentry anyway. + * 1. free disconnected dentries as soon as their refcount + * reached zero. + * 2. free dentries if they should not be cached. */ smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | + DCACHE_DISCONNECTED | DCACHE_DONTCACHE; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d640b145637..f45f9feebe59 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -185,7 +185,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - J_ASSERT_BH(bh, buffer_locked(bh)); + ASSERT(buffer_locked(bh)); /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8e6ca23ed172..4666b55b736e 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb, err = add_system_zone(system_blks, map.m_pblk, n, ino); if (err < 0) { if (err == -EFSCORRUPTED) { - __ext4_error(sb, __func__, __LINE__, - -err, map.m_pblk, - "blocks %llu-%llu from inode %u overlap system zone", - map.m_pblk, - map.m_pblk + map.m_len - 1, - ino); + EXT4_ERROR_INODE_ERR(inode, -err, + "blocks %llu-%llu from inode overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1); } break; } @@ -206,7 +204,7 @@ static void ext4_destroy_system_zone(struct rcu_head *rcu) * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. That's why we first build the rbtree and then * swap it in place. */ @@ -258,7 +256,7 @@ int ext4_setup_system_zone(struct super_block *sb) /* * System blks rbtree complete, announce it once to prevent racing - * with ext4_data_block_valid() accessing the rbtree at the same + * with ext4_inode_block_valid() accessing the rbtree at the same * time. */ rcu_assign_pointer(sbi->s_system_blks, system_blks); @@ -278,7 +276,7 @@ err: * * The update of system_blks pointer in this function is protected by * sb->s_umount semaphore. However we have to be careful as we can be - * racing with ext4_data_block_valid() calls reading system_blks rbtree + * racing with ext4_inode_block_valid() calls reading system_blks rbtree * protected only by RCU. So we first clear the system_blks pointer and * then free the rbtree only after RCU grace period expires. */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c21deb2e8a97..dcd8e03b6caf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -98,6 +98,16 @@ #define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif +#define ASSERT(assert) \ +do { \ + if (unlikely(!(assert))) { \ + printk(KERN_EMERG \ + "Assertion failure in %s() at %s:%d: '%s'\n", \ + __func__, __FILE__, __LINE__, #assert); \ + BUG(); \ + } \ +} while (0) + /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -1619,6 +1629,27 @@ struct ext4_sb_info { errseq_t s_bdev_wb_err; spinlock_t s_bdev_wb_lock; + /* Information about errors that happened during this mount */ + spinlock_t s_error_lock; + int s_add_error_count; + int s_first_error_code; + __u32 s_first_error_line; + __u32 s_first_error_ino; + __u64 s_first_error_block; + const char *s_first_error_func; + time64_t s_first_error_time; + int s_last_error_code; + __u32 s_last_error_line; + __u32 s_last_error_ino; + __u64 s_last_error_block; + const char *s_last_error_func; + time64_t s_last_error_time; + /* + * If we are in a context where we cannot update error information in + * the on-disk superblock, we queue this work to do it. + */ + struct work_struct s_error_work; + /* Ext4 fast commit stuff */ atomic_t s_fc_subtid; atomic_t s_fc_ineligible_updates; @@ -1858,7 +1889,6 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ -#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV #define EXT4_GOOD_OLD_INODE_SIZE 128 @@ -2998,9 +3028,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); -extern __printf(6, 7) -void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64, - const char *, ...); +extern __printf(7, 8) +void __ext4_error(struct super_block *, const char *, unsigned int, bool, + int, __u64, const char *, ...); extern __printf(6, 7) void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, int, const char *, ...); @@ -3009,9 +3039,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); -extern __printf(5, 6) -void __ext4_abort(struct super_block *, const char *, unsigned int, int, - const char *, ...); extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, const char *, ...); @@ -3041,6 +3068,9 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define EXT4_ERROR_FILE(file, block, fmt, a...) \ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) +#define ext4_abort(sb, err, fmt, a...) \ + __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a) + #ifdef CONFIG_PRINTK #define ext4_error_inode(inode, func, line, block, fmt, ...) \ @@ -3051,11 +3081,11 @@ void __ext4_grp_locked_error(const char *, unsigned int, #define ext4_error_file(file, func, line, block, fmt, ...) \ __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) #define ext4_error(sb, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \ + ##__VA_ARGS__) #define ext4_error_err(sb, err, fmt, ...) \ - __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__) -#define ext4_abort(sb, err, fmt, ...) \ - __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__) + __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \ + ##__VA_ARGS__) #define ext4_warning(sb, fmt, ...) \ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_warning_inode(inode, fmt, ...) \ @@ -3088,17 +3118,12 @@ do { \ #define ext4_error(sb, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, 0, 0, " "); \ + __ext4_error(sb, "", 0, false, 0, 0, " "); \ } while (0) #define ext4_error_err(sb, err, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ - __ext4_error(sb, "", 0, err, 0, " "); \ -} while (0) -#define ext4_abort(sb, err, fmt, ...) \ -do { \ - no_printk(fmt, ##__VA_ARGS__); \ - __ext4_abort(sb, "", 0, err, " "); \ + __ext4_error(sb, "", 0, false, err, 0, " "); \ } while (0) #define ext4_warning(sb, fmt, ...) \ do { \ @@ -3407,6 +3432,21 @@ static inline void ext4_unlock_group(struct super_block *sb, spin_unlock(ext4_group_lock_ptr(sb, group)); } +#ifdef CONFIG_QUOTA +static inline bool ext4_quota_capable(struct super_block *sb) +{ + return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb)); +} + +static inline bool ext4_is_quota_journalled(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + return (ext4_has_feature_quota(sb) || + sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]); +} +#endif + /* * Block validity checking */ @@ -3652,7 +3692,6 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0fd0c42a4f7d..1a0a827a7f34 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - __ext4_abort(inode->i_sb, where, line, -err, - "error %d when attempting revoke", err); + __ext4_error(inode->i_sb, where, line, true, -err, 0, + "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); return err; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 00dc668e052b..a124c68b0c75 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,17 +86,14 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ? 1 : 0) +#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - ext4_has_feature_quota(sb)) ?\ +#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 17d7096b3212..3960b7ec3ab7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int ret; path = ext4_find_extent(inode, start, NULL, 0); - if (!path) - return -EINVAL; + if (IS_ERR(path)) + return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { ret = -EFSCORRUPTED; @@ -5988,7 +5988,6 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) kfree(path); break; } - ex = path2[path2->p_depth].p_ext; for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { cmp1 = cmp2 = 0; if (i <= path->p_depth) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f2033e13a273..4fcc21c25e79 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -103,8 +103,69 @@ * * Replay code should thus check for all the valid tails in the FC area. * + * Fast Commit Replay Idempotence + * ------------------------------ + * + * Fast commits tags are idempotent in nature provided the recovery code follows + * certain rules. The guiding principle that the commit path follows while + * committing is that it stores the result of a particular operation instead of + * storing the procedure. + * + * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' + * was associated with inode 10. During fast commit, instead of storing this + * operation as a procedure "rename a to b", we store the resulting file system + * state as a "series" of outcomes: + * + * - Link dirent b to inode 10 + * - Unlink dirent a + * - Inode <10> with valid refcount + * + * Now when recovery code runs, it needs "enforce" this state on the file + * system. This is what guarantees idempotence of fast commit replay. + * + * Let's take an example of a procedure that is not idempotent and see how fast + * commits make it idempotent. Consider following sequence of operations: + * + * rm A; mv B A; read A + * (x) (y) (z) + * + * (x), (y) and (z) are the points at which we can crash. If we store this + * sequence of operations as is then the replay is not idempotent. Let's say + * while in replay, we crash at (z). During the second replay, file A (which was + * actually created as a result of "mv B A" operation) would get deleted. Thus, + * file named A would be absent when we try to read A. So, this sequence of + * operations is not idempotent. However, as mentioned above, instead of storing + * the procedure fast commits store the outcome of each procedure. Thus the fast + * commit log for above procedure would be as follows: + * + * (Let's assume dirent A was linked to inode 10 and dirent B was linked to + * inode 11 before the replay) + * + * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] + * (w) (x) (y) (z) + * + * If we crash at (z), we will have file A linked to inode 11. During the second + * replay, we will remove file A (inode 11). But we will create it back and make + * it point to inode 11. We won't find B, so we'll just skip that step. At this + * point, the refcount for inode 11 is not reliable, but that gets fixed by the + * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled + * similarly. Thus, by converting a non-idempotent procedure into a series of + * idempotent outcomes, fast commits ensured idempotence during the replay. + * * TODOs * ----- + * + * 0) Fast commit replay path hardening: Fast commit replay code should use + * journal handles to make sure all the updates it does during the replay + * path are atomic. With that if we crash during fast commit replay, after + * trying to do recovery again, we will find a file system where fast commit + * area is invalid (because new full commit would be found). In order to deal + * with that, fast commit replay code should ensure that the "FC_REPLAY" + * superblock state is persisted before starting the replay, so that after + * the crash, fast commit recovery code can look at that flag and perform + * fast commit recovery even if that area is invalidated by later full + * commits. + * * 1) Make fast commit atomic updates more fine grained. Today, a fast commit * eligible update must be protected within ext4_fc_start_update() and * ext4_fc_stop_update(). These routines are called at much higher @@ -1220,18 +1281,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full) /* Ext4 Replay Path Routines */ -/* Get length of a particular tlv */ -static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) -{ - return le16_to_cpu(tl->fc_len); -} - -/* Get a pointer to "value" of a tlv */ -static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) -{ - return (u8 *)tl + sizeof(*tl); -} - /* Helper struct for dentry replay routines */ struct dentry_info_args { int parent_ino, dname_len, ino, inode_len; @@ -1770,32 +1819,6 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) return 0; } -static inline const char *tag2str(u16 tag) -{ - switch (tag) { - case EXT4_FC_TAG_LINK: - return "TAG_ADD_ENTRY"; - case EXT4_FC_TAG_UNLINK: - return "TAG_DEL_ENTRY"; - case EXT4_FC_TAG_ADD_RANGE: - return "TAG_ADD_RANGE"; - case EXT4_FC_TAG_CREAT: - return "TAG_CREAT_DENTRY"; - case EXT4_FC_TAG_DEL_RANGE: - return "TAG_DEL_RANGE"; - case EXT4_FC_TAG_INODE: - return "TAG_INODE"; - case EXT4_FC_TAG_PAD: - return "TAG_PAD"; - case EXT4_FC_TAG_TAIL: - return "TAG_TAIL"; - case EXT4_FC_TAG_HEAD: - return "TAG_HEAD"; - default: - return "TAG_ERROR"; - } -} - static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) { struct ext4_fc_replay_state *state; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 3a6e5a1fa1b8..b77f70f55a62 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -3,6 +3,11 @@ #ifndef __FAST_COMMIT_H__ #define __FAST_COMMIT_H__ +/* + * Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and + * linux/fs/ext4/fast_commit.h. These file should always be byte identical. + */ + /* Fast commit tags */ #define EXT4_FC_TAG_ADD_RANGE 0x0001 #define EXT4_FC_TAG_DEL_RANGE 0x0002 @@ -50,7 +55,7 @@ struct ext4_fc_del_range { struct ext4_fc_dentry_info { __le32 fc_parent_ino; __le32 fc_ino; - u8 fc_dname[0]; + __u8 fc_dname[0]; }; /* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ @@ -65,19 +70,6 @@ struct ext4_fc_tail { __le32 fc_crc; }; -/* - * In memory list of dentry updates that are performed on the file - * system used by fast commit code. - */ -struct ext4_fc_dentry_update { - int fcd_op; /* Type of update create / unlink / link */ - int fcd_parent; /* Parent inode number */ - int fcd_ino; /* Inode number */ - struct qstr fcd_name; /* Dirent name */ - unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ - struct list_head fcd_list; -}; - /* * Fast commit reason codes */ @@ -107,6 +99,20 @@ enum { EXT4_FC_REASON_MAX }; +#ifdef __KERNEL__ +/* + * In memory list of dentry updates that are performed on the file + * system used by fast commit code. + */ +struct ext4_fc_dentry_update { + int fcd_op; /* Type of update create / unlink / link */ + int fcd_parent; /* Parent inode number */ + int fcd_ino; /* Inode number */ + struct qstr fcd_name; /* Dirent name */ + unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ + struct list_head fcd_list; +}; + struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; @@ -145,13 +151,51 @@ struct ext4_fc_replay_state { }; #define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1) +#endif #define fc_for_each_tl(__start, __end, __tl) \ - for (tl = (struct ext4_fc_tl *)start; \ - (u8 *)tl < (u8 *)end; \ - tl = (struct ext4_fc_tl *)((u8 *)tl + \ + for (tl = (struct ext4_fc_tl *)(__start); \ + (__u8 *)tl < (__u8 *)(__end); \ + tl = (struct ext4_fc_tl *)((__u8 *)tl + \ sizeof(struct ext4_fc_tl) + \ + le16_to_cpu(tl->fc_len))) +static inline const char *tag2str(__u16 tag) +{ + switch (tag) { + case EXT4_FC_TAG_LINK: + return "ADD_ENTRY"; + case EXT4_FC_TAG_UNLINK: + return "DEL_ENTRY"; + case EXT4_FC_TAG_ADD_RANGE: + return "ADD_RANGE"; + case EXT4_FC_TAG_CREAT: + return "CREAT_DENTRY"; + case EXT4_FC_TAG_DEL_RANGE: + return "DEL_RANGE"; + case EXT4_FC_TAG_INODE: + return "INODE"; + case EXT4_FC_TAG_PAD: + return "PAD"; + case EXT4_FC_TAG_TAIL: + return "TAIL"; + case EXT4_FC_TAG_HEAD: + return "HEAD"; + default: + return "ERROR"; + } +} + +/* Get length of a particular tlv */ +static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl) +{ + return le16_to_cpu(tl->fc_len); +} + +/* Get a pointer to "value" of a tlv */ +static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl) +{ + return (__u8 *)tl + sizeof(*tl); +} #endif /* __FAST_COMMIT_H__ */ diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a42ca95840f2..113bfb023a4a 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -136,7 +136,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (unlikely(ext4_forced_shutdown(sbi))) return -EIO; - J_ASSERT(ext4_journal_current_handle() == NULL); + ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file_enter(file, datasync); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 05efa682bc2f..1223a18c3ff9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -534,8 +534,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5c705670e93a..d6bad40d8752 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -176,6 +176,7 @@ void ext4_evict_inode(struct inode *inode) */ int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; + bool freeze_protected = false; trace_ext4_evict_inode(inode); @@ -233,9 +234,14 @@ void ext4_evict_inode(struct inode *inode) /* * Protect us against freezing - iput() caller didn't have to have any - * protection against it + * protection against it. When we are in a running transaction though, + * we are already protected against freezing and we cannot grab further + * protection due to lock ordering constraints. */ - sb_start_intwrite(inode->i_sb); + if (!ext4_journal_current_handle()) { + sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); @@ -254,7 +260,8 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -295,7 +302,8 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); goto no_delete; } @@ -324,7 +332,8 @@ stop_handle: else ext4_free_inode(handle, inode); ext4_journal_stop(handle); - sb_end_intwrite(inode->i_sb); + if (freeze_protected) + sb_end_intwrite(inode->i_sb); ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: @@ -831,8 +840,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, int create = map_flags & EXT4_GET_BLOCKS_CREATE; int err; - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || handle != NULL || create == 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || handle != NULL || create == 0); map.m_lblk = block; map.m_len = 1; @@ -847,9 +856,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, if (unlikely(!bh)) return ERR_PTR(-ENOMEM); if (map.m_flags & EXT4_MAP_NEW) { - J_ASSERT(create != 0); - J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - || (handle != NULL)); + ASSERT(create != 0); + ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + || (handle != NULL)); /* * Now that we do not always journal data, we should @@ -2068,7 +2077,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); + ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -2102,7 +2111,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); + err = ext4_bio_write_page(&mpd->io_submit, page, len, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -4634,7 +4643,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { if (flags & EXT4_IGET_HANDLE) return ERR_PTR(-ESTALE); - __ext4_error(sb, function, line, EFSCORRUPTED, 0, + __ext4_error(sb, function, line, false, EFSCORRUPTED, 0, "inode #%lu: comm %s: iget: illegal inode #", ino, current->comm); return ERR_PTR(-EFSCORRUPTED); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 24af9ed5c3e5..99bf091fee10 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -822,24 +822,6 @@ void ext4_mb_generate_buddy(struct super_block *sb, spin_unlock(&sbi->s_bal_lock); } -static void mb_regenerate_buddy(struct ext4_buddy *e4b) -{ - int count; - int order = 1; - void *buddy; - - while ((buddy = mb_find_buddy(e4b, order++, &count))) { - ext4_set_bits(buddy, 0, count); - } - e4b->bd_info->bb_fragments = 0; - memset(e4b->bd_info->bb_counters, 0, - sizeof(*e4b->bd_info->bb_counters) * - (e4b->bd_sb->s_blocksize_bits + 2)); - - ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, - e4b->bd_bitmap, e4b->bd_group); -} - /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1307,22 +1289,18 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { - int order = 1; - int bb_incr = 1 << (e4b->bd_blkbits - 1); + int order = 1, max; void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); - bb = e4b->bd_buddy; while (order <= e4b->bd_blkbits + 1) { - block = block >> 1; - if (!mb_test_bit(block, bb)) { + bb = mb_find_buddy(e4b, order, &max); + if (!mb_test_bit(block >> order, bb)) { /* this block is part of buddy of order 'order' */ return order; } - bb += bb_incr; - bb_incr >>= 1; order++; } return 0; @@ -1512,7 +1490,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); } - mb_regenerate_buddy(e4b); goto done; } @@ -2395,9 +2372,9 @@ repeat: nr = sbi->s_mb_prefetch; if (ext4_has_feature_flex_bg(sb)) { - nr = (group / sbi->s_mb_prefetch) * - sbi->s_mb_prefetch; - nr = nr + sbi->s_mb_prefetch - group; + nr = 1 << sbi->s_log_groups_per_flex; + nr -= group & (nr - 1); + nr = min(nr, sbi->s_mb_prefetch); } prefetch_grp = ext4_mb_prefetch(sb, group, nr, &prefetch_ios); @@ -2733,7 +2710,8 @@ static int ext4_mb_init_backend(struct super_block *sb) if (ext4_has_feature_flex_bg(sb)) { /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { sbi->s_mb_prefetch = 32; @@ -5126,6 +5104,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, cluster), "Block already on to-be-freed list"); + kmem_cache_free(ext4_free_data_cachep, new_entry); return 0; } } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 74bb39c93655..b1f747bcd0d1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -182,10 +182,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - #ifdef DX_DEBUG #define dxtrace(command) command #else @@ -866,7 +862,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, break; } } - assert (at == p - 1); + ASSERT(at == p - 1); } at = p - 1; @@ -1303,8 +1299,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) struct dx_entry *old = frame->at, *new = old + 1; int count = dx_get_count(entries); - assert(count < dx_get_limit(entries)); - assert(old < entries + count); + ASSERT(count < dx_get_limit(entries)); + ASSERT(old < entries + count); memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); dx_set_hash(new, hash); dx_set_block(new, block); @@ -3071,7 +3067,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * hold i_mutex, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ - J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index defd2e10dfd1..03a44a0de86a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -111,9 +111,6 @@ static void ext4_finish_bio(struct bio *bio) unsigned under_io = 0; unsigned long flags; - if (!page) - continue; - if (fscrypt_is_bounce_page(page)) { bounce_page = page; page = fscrypt_pagecache_page(bounce_page); @@ -438,7 +435,6 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc, bool keep_towrite) { struct page *bounce_page = NULL; @@ -448,6 +444,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, int ret = 0; int nr_submitted = 0; int nr_to_submit = 0; + struct writeback_control *wbc = io->io_wbc; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4396e1f72b33..623f69dfc28c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -404,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } -static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now) { - time64_t now = ktime_get_real_seconds(); - now = clamp_val(now, 0, (1ull << 40) - 1); *lo = cpu_to_le32(lower_32_bits(now)); @@ -419,108 +417,11 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); } #define ext4_update_tstamp(es, tstamp) \ - __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \ + ktime_get_real_seconds()) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - int err; - - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_update_tstamp(es, s_last_error_time); - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); - es->s_last_error_line = cpu_to_le32(line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - switch (error) { - case EIO: - err = EXT4_ERR_EIO; - break; - case ENOMEM: - err = EXT4_ERR_ENOMEM; - break; - case EFSBADCRC: - err = EXT4_ERR_EFSBADCRC; - break; - case 0: - case EFSCORRUPTED: - err = EXT4_ERR_EFSCORRUPTED; - break; - case ENOSPC: - err = EXT4_ERR_ENOSPC; - break; - case ENOKEY: - err = EXT4_ERR_ENOKEY; - break; - case EROFS: - err = EXT4_ERR_EROFS; - break; - case EFBIG: - err = EXT4_ERR_EFBIG; - break; - case EEXIST: - err = EXT4_ERR_EEXIST; - break; - case ERANGE: - err = EXT4_ERR_ERANGE; - break; - case EOVERFLOW: - err = EXT4_ERR_EOVERFLOW; - break; - case EBUSY: - err = EXT4_ERR_EBUSY; - break; - case ENOTDIR: - err = EXT4_ERR_ENOTDIR; - break; - case ENOTEMPTY: - err = EXT4_ERR_ENOTEMPTY; - break; - case ESHUTDOWN: - err = EXT4_ERR_ESHUTDOWN; - break; - case EFAULT: - err = EXT4_ERR_EFAULT; - break; - default: - err = EXT4_ERR_UNKNOWN; - } - es->s_last_error_errcode = err; - if (!es->s_first_error_time) { - es->s_first_error_time = es->s_last_error_time; - es->s_first_error_time_hi = es->s_last_error_time_hi; - strncpy(es->s_first_error_func, func, - sizeof(es->s_first_error_func)); - es->s_first_error_line = cpu_to_le32(line); - es->s_first_error_ino = es->s_last_error_ino; - es->s_first_error_block = es->s_last_error_block; - es->s_first_error_errcode = es->s_last_error_errcode; - } - /* - * Start the daily error reporting function if it hasn't been - * started already - */ - if (!es->s_error_count) - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - le32_add_cpu(&es->s_error_count, 1); -} - -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); -} - /* * The del_gendisk() function uninitializes the disk-specific data * structures, including the bdi structure, without telling anyone @@ -649,6 +550,83 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } +struct ext4_err_translation { + int code; + int errno; +}; + +#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err } + +static struct ext4_err_translation err_translation[] = { + EXT4_ERR_TRANSLATE(EIO), + EXT4_ERR_TRANSLATE(ENOMEM), + EXT4_ERR_TRANSLATE(EFSBADCRC), + EXT4_ERR_TRANSLATE(EFSCORRUPTED), + EXT4_ERR_TRANSLATE(ENOSPC), + EXT4_ERR_TRANSLATE(ENOKEY), + EXT4_ERR_TRANSLATE(EROFS), + EXT4_ERR_TRANSLATE(EFBIG), + EXT4_ERR_TRANSLATE(EEXIST), + EXT4_ERR_TRANSLATE(ERANGE), + EXT4_ERR_TRANSLATE(EOVERFLOW), + EXT4_ERR_TRANSLATE(EBUSY), + EXT4_ERR_TRANSLATE(ENOTDIR), + EXT4_ERR_TRANSLATE(ENOTEMPTY), + EXT4_ERR_TRANSLATE(ESHUTDOWN), + EXT4_ERR_TRANSLATE(EFAULT), +}; + +static int ext4_errno_to_code(int errno) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(err_translation); i++) + if (err_translation[i].errno == errno) + return err_translation[i].code; + return EXT4_ERR_UNKNOWN; +} + +static void __save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; + /* We default to EFSCORRUPTED error... */ + if (error == 0) + error = EFSCORRUPTED; + + spin_lock(&sbi->s_error_lock); + sbi->s_add_error_count++; + sbi->s_last_error_code = error; + sbi->s_last_error_line = line; + sbi->s_last_error_ino = ino; + sbi->s_last_error_block = block; + sbi->s_last_error_func = func; + sbi->s_last_error_time = ktime_get_real_seconds(); + if (!sbi->s_first_error_time) { + sbi->s_first_error_code = error; + sbi->s_first_error_line = line; + sbi->s_first_error_ino = ino; + sbi->s_first_error_block = block; + sbi->s_first_error_func = func; + sbi->s_first_error_time = sbi->s_last_error_time; + } + spin_unlock(&sbi->s_error_lock); +} + +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) +{ + __save_error_info(sb, error, ino, block, func, line); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -662,40 +640,50 @@ static bool system_going_down(void) * We'll just use the jbd2_journal_abort() error code to record an error in * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. + * + * If force_ro is set, we unconditionally force the filesystem into an + * ABORT|READONLY state, unless the error response on the fs has been set to + * panic in which case we take the easy way out and panic immediately. This is + * used to deal with unrecoverable failures such as journal IO errors or ENOMEM + * at a critical moment in log management. */ - -static void ext4_handle_error(struct super_block *sb) +static void ext4_handle_error(struct super_block *sb, bool force_ro) { + journal_t *journal = EXT4_SB(sb)->s_journal; + if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (sb_rdonly(sb)) + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; - if (!test_opt(sb, ERRORS_CONT)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); - } + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already * disabled. */ - if (test_opt(sb, ERRORS_RO) || system_going_down()) { - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } else if (test_opt(sb, ERRORS_PANIC)) { + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); } + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); + /* + * Make sure updated value of ->s_mount_flags will be visible before + * ->s_flags update + */ + smp_wmb(); + sb->s_flags |= SB_RDONLY; +} + +static void flush_stashed_error_work(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, + s_error_work); + + ext4_commit_super(sbi->s_sb, 1); } #define ext4_error_ratelimit(sb) \ @@ -703,7 +691,7 @@ static void ext4_handle_error(struct super_block *sb) "EXT4-fs error") void __ext4_error(struct super_block *sb, const char *function, - unsigned int line, int error, __u64 block, + unsigned int line, bool force_ro, int error, __u64 block, const char *fmt, ...) { struct va_format vaf; @@ -723,7 +711,7 @@ void __ext4_error(struct super_block *sb, const char *function, va_end(args); } save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb); + ext4_handle_error(sb, force_ro); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -755,7 +743,7 @@ void __ext4_error_inode(struct inode *inode, const char *function, } save_error_info(inode->i_sb, error, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } void __ext4_error_file(struct file *file, const char *function, @@ -794,7 +782,7 @@ void __ext4_error_file(struct file *file, const char *function, } save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, function, line); - ext4_handle_error(inode->i_sb); + ext4_handle_error(inode->i_sb, false); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -862,51 +850,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, } save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb); -} - -/* - * ext4_abort is a much stronger failure handler than ext4_error. The - * abort function may be used to deal with unrecoverable failures such - * as journal IO errors or ENOMEM at a critical moment in log management. - * - * We unconditionally force the filesystem into an ABORT|READONLY state, - * unless the error response on the fs has been set to panic in which - * case we take the easy way out and panic immediately. - */ - -void __ext4_abort(struct super_block *sb, const char *function, - unsigned int line, int error, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) - return; - - save_error_info(sb, error, 0, 0, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n", - sb->s_id, function, line, &vaf); - va_end(args); - - if (sb_rdonly(sb) == 0) { - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (EXT4_SB(sb)->s_journal) - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); - - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_flags will be visible - * before ->s_flags update - */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) - panic("EXT4-fs panic from previous error\n"); + ext4_handle_error(sb, false); } void __ext4_msg(struct super_block *sb, @@ -982,8 +926,6 @@ __acquires(bitlock) return; trace_ext4_error(sb, function, line); - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -999,17 +941,16 @@ __acquires(bitlock) va_end(args); } - if (test_opt(sb, WARN_ON_ERROR)) - WARN_ON_ONCE(1); - if (test_opt(sb, ERRORS_CONT)) { - ext4_commit_super(sb, 0); + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + schedule_work(&EXT4_SB(sb)->s_error_work); return; } - ext4_unlock_group(sb, grp); - ext4_commit_super(sb, 1); - ext4_handle_error(sb); + save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + ext4_handle_error(sb, false); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the @@ -1181,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); + flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq); /* @@ -1240,7 +1182,7 @@ static void ext4_put_super(struct super_block *sb) * in-memory list had better be clean by this point. */ if (!list_empty(&sbi->s_orphan)) dump_orphan_list(sb, sbi); - J_ASSERT(list_empty(&sbi->s_orphan)); + ASSERT(list_empty(&sbi->s_orphan)); sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); @@ -4005,6 +3947,21 @@ static void ext4_set_resv_clusters(struct super_block *sb) atomic64_set(&sbi->s_resv_clusters, resv_clusters); } +static const char *ext4_quota_mode(struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + if (!ext4_quota_capable(sb)) + return "none"; + + if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb)) + return "journalled"; + else + return "writeback"; +#else + return "disabled"; +#endif +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); @@ -4073,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(bh)) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); ret = PTR_ERR(bh); - bh = NULL; goto out_fail; } /* @@ -4187,19 +4143,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } + + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize == PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -4411,21 +4374,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", @@ -4696,7 +4644,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "can't read group descriptor %d", i); db_count = i; ret = PTR_ERR(bh); - bh = NULL; goto failed_mount2; } rcu_read_lock(); @@ -4711,6 +4658,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } timer_setup(&sbi->s_err_report, print_daily_error_info, 0); + spin_lock_init(&sbi->s_error_lock); + INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); /* Register extent status tree shrinker */ if (ext4_es_register_shrinker(sbi)) @@ -4866,6 +4815,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "requested data journaling mode"); goto failed_mount_wq; } + break; default: break; } @@ -4994,13 +4944,11 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeclusters_counter, block, GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } @@ -5080,10 +5028,11 @@ no_journal: if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s", descr, + "Opts: %.*s%s%s. Quota mode: %s.", descr, (int) sizeof(sbi->s_es->s_mount_opts), sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data); + *sbi->s_es->s_mount_opts ? "; " : "", orig_data, + ext4_quota_mode(sb)); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -5148,6 +5097,7 @@ failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); + flush_work(&sbi->s_error_work); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -5474,6 +5424,7 @@ err_out: static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -5505,6 +5456,46 @@ static int ext4_commit_super(struct super_block *sb, int sync) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); + /* Copy error information to the on-disk superblock */ + spin_lock(&sbi->s_error_lock); + if (sbi->s_add_error_count > 0) { + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + if (!es->s_first_error_time && !es->s_first_error_time_hi) { + __ext4_update_tstamp(&es->s_first_error_time, + &es->s_first_error_time_hi, + sbi->s_first_error_time); + strncpy(es->s_first_error_func, sbi->s_first_error_func, + sizeof(es->s_first_error_func)); + es->s_first_error_line = + cpu_to_le32(sbi->s_first_error_line); + es->s_first_error_ino = + cpu_to_le32(sbi->s_first_error_ino); + es->s_first_error_block = + cpu_to_le64(sbi->s_first_error_block); + es->s_first_error_errcode = + ext4_errno_to_code(sbi->s_first_error_code); + } + __ext4_update_tstamp(&es->s_last_error_time, + &es->s_last_error_time_hi, + sbi->s_last_error_time); + strncpy(es->s_last_error_func, sbi->s_last_error_func, + sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line); + es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino); + es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block); + es->s_last_error_errcode = + ext4_errno_to_code(sbi->s_last_error_code); + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); + le32_add_cpu(&es->s_error_count, sbi->s_add_error_count); + sbi->s_add_error_count = 0; + } + spin_unlock(&sbi->s_error_lock); + BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); if (sync) @@ -5858,6 +5849,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + /* Flush outstanding errors before changing fs state */ + flush_work(&sbi->s_error_work); + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; @@ -6016,7 +6010,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", + orig_data, ext4_quota_mode(sb)); kfree(orig_data); return 0; @@ -6195,11 +6190,8 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - /* Are we journaling quotas? */ - if (ext4_has_feature_quota(sb) || - sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if (ext4_is_quota_journalled(sb)) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); } else { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6127e94ea4f5..4e3b1f8c2e81 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1927,7 +1927,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } else { /* Allocate a buffer where we construct the new block. */ s->base = kzalloc(sb->s_blocksize, GFP_NOFS); - /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) goto cleanup; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index c070c0d8e3e9..aea35459d390 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -315,7 +315,7 @@ retry: if (mode & FMODE_WRITE) r = w = 1; - name = dentry_name(file->f_path.dentry); + name = dentry_name(d_real(file->f_path.dentry, file->f_inode)); if (name == NULL) return -ENOMEM; diff --git a/fs/inode.c b/fs/inode.c index cb008acf0efd..6442d97d9a4a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1624,7 +1624,9 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); - if (!drop && (sb->s_flags & SB_ACTIVE)) { + if (!drop && + !(inode->i_state & I_DONTCACHE) && + (sb->s_flags & SB_ACTIVE)) { inode_add_lru(inode); spin_unlock(&inode->i_lock); return; diff --git a/fs/io-wq.c b/fs/io-wq.c index f72d53848dcb..a564f36e260c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -36,8 +36,7 @@ enum { enum { IO_WQ_BIT_EXIT = 0, /* wq exiting */ - IO_WQ_BIT_CANCEL = 1, /* cancel work on list */ - IO_WQ_BIT_ERROR = 2, /* error on setup */ + IO_WQ_BIT_ERROR = 1, /* error on setup */ }; enum { @@ -561,12 +560,6 @@ get_next: next_hashed = wq_next_work(work); io_impersonate_work(worker, work); - /* - * OK to set IO_WQ_WORK_CANCEL even for uncancellable - * work, the worker function will do the right thing. - */ - if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) - work->flags |= IO_WQ_WORK_CANCEL; old_work = work; linked = wq->do_work(work); @@ -732,12 +725,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) return acct->nr_workers < acct->max_workers; } -static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) -{ - send_sig(SIGINT, worker->task, 1); - return false; -} - /* * Iterate the passed in list and call the specific function for each * worker that isn't exiting @@ -938,21 +925,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); } -void io_wq_cancel_all(struct io_wq *wq) -{ - int node; - - set_bit(IO_WQ_BIT_CANCEL, &wq->state); - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL); - } - rcu_read_unlock(); -} - struct io_cb_cancel_data { work_cancel_fn *fn; void *data; diff --git a/fs/io-wq.h b/fs/io-wq.h index 069496c6d4f9..b158f8addcf3 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -59,6 +59,7 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, list->last->next = node; list->last = node; } + node->next = NULL; } static inline void wq_list_cut(struct io_wq_work_list *list, @@ -128,8 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) return work->flags & IO_WQ_WORK_HASHED; } -void io_wq_cancel_all(struct io_wq *wq); - typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f9392c35eef..7e35283fc0b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1693,6 +1693,11 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) return io_wq_current_is_worker(); } +static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) +{ + return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +} + static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { if (waitqueue_active(&ctx->wait)) @@ -1703,15 +1708,6 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } -static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) -{ - if (list_empty(&ctx->cq_overflow_list)) { - clear_bit(0, &ctx->sq_check_overflow); - clear_bit(0, &ctx->cq_check_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; - } -} - /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -1721,23 +1717,13 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct io_kiocb *req, *tmp; struct io_uring_cqe *cqe; unsigned long flags; + bool all_flushed; LIST_HEAD(list); - if (!force) { - if (list_empty_careful(&ctx->cq_overflow_list)) - return true; - if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) == - rings->cq_ring_entries)) - return false; - } + if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) + return false; spin_lock_irqsave(&ctx->completion_lock, flags); - - /* if force is set, the ring is going away. always drop after that */ - if (force) - ctx->cq_overflow_flushed = 1; - - cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) continue; @@ -1758,9 +1744,14 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, } } - io_commit_cqring(ctx); - io_cqring_mark_overflow(ctx); + all_flushed = list_empty(&ctx->cq_overflow_list); + if (all_flushed) { + clear_bit(0, &ctx->sq_check_overflow); + clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + } + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1770,7 +1761,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, io_put_req(req); } - return cqe != NULL; + return all_flushed; } static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) @@ -2320,8 +2311,6 @@ static void io_double_put_req(struct io_kiocb *req) static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) { - struct io_rings *rings = ctx->rings; - if (test_bit(0, &ctx->cq_check_overflow)) { /* * noflush == true is from the waitqueue handler, just ensure @@ -2336,7 +2325,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) /* See comment at the top of this file */ smp_rmb(); - return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); + return __io_cqring_events(ctx); } static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) @@ -3136,9 +3125,7 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, iov[0].iov_len = kbuf->len; return 0; } - if (!req->rw.len) - return 0; - else if (req->rw.len > 1) + if (req->rw.len != 1) return -EINVAL; #ifdef CONFIG_COMPAT @@ -3784,6 +3771,8 @@ static int io_shutdown(struct io_kiocb *req, bool force_nonblock) return -ENOTSOCK; ret = __sys_shutdown_sock(sock, req->shutdown.how); + if (ret < 0) + req_set_fail_links(req); io_req_complete(req, ret); return 0; #else @@ -6107,15 +6096,15 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; - spin_lock_irqsave(&ctx->inflight_lock, flags); - list_del(&req->inflight_entry); - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); - spin_unlock_irqrestore(&ctx->inflight_lock, flags); - req->flags &= ~REQ_F_INFLIGHT; put_files_struct(req->work.identity->files); put_nsproxy(req->work.identity->nsproxy); + spin_lock_irqsave(&ctx->inflight_lock, flags); + list_del(&req->inflight_entry); + spin_unlock_irqrestore(&ctx->inflight_lock, flags); + req->flags &= ~REQ_F_INFLIGHT; req->work.flags &= ~IO_WQ_WORK_FILES; + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); } static void __io_clean_op(struct io_kiocb *req) @@ -6343,19 +6332,28 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) } if (ret) { - /* - * io_iopoll_complete() does not hold completion_lock to complete - * polled io, so here for polled io, just mark it done and still let - * io_iopoll_complete() complete it. - */ - if (req->ctx->flags & IORING_SETUP_IOPOLL) { - struct kiocb *kiocb = &req->rw.kiocb; + struct io_ring_ctx *lock_ctx = NULL; - kiocb_done(kiocb, ret, NULL); - } else { - req_set_fail_links(req); - io_req_complete(req, ret); - } + if (req->ctx->flags & IORING_SETUP_IOPOLL) + lock_ctx = req->ctx; + + /* + * io_iopoll_complete() does not hold completion_lock to + * complete polled io, so here for polled io, we can not call + * io_req_complete() directly, otherwise there maybe concurrent + * access to cqring, defer_list, etc, which is not safe. Given + * that io_iopoll_complete() is always called under uring_lock, + * so here for polled io, we also get uring_lock to complete + * it. + */ + if (lock_ctx) + mutex_lock(&lock_ctx->uring_lock); + + req_set_fail_links(req); + io_req_complete(req, ret); + + if (lock_ctx) + mutex_unlock(&lock_ctx->uring_lock); } return io_steal_work(req); @@ -6824,8 +6822,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!list_empty(&ctx->cq_overflow_list) && - !io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -8155,10 +8152,13 @@ static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, __io_unaccount_mem(ctx->user, nr_pages); if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm -= nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + }else if (acct == ACCT_PINNED) { atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); + } } } @@ -8174,10 +8174,13 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages, } if (ctx->mm_account) { - if (acct == ACCT_LOCKED) + if (acct == ACCT_LOCKED) { + mmap_write_lock(ctx->mm_account); ctx->mm_account->locked_vm += nr_pages; - else if (acct == ACCT_PINNED) + mmap_write_unlock(ctx->mm_account); + } else if (acct == ACCT_PINNED) { atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + } } return 0; @@ -8643,10 +8646,19 @@ static void io_ring_exit_work(struct work_struct *work) io_ring_ctx_free(ctx); } +static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + /* if force is set, the ring is going away. always drop after that */ + ctx->cq_overflow_flushed = 1; if (ctx->rings) io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); @@ -8655,7 +8667,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_poll_remove_all(ctx, NULL, NULL); if (ctx->io_wq) - io_wq_cancel_all(ctx->io_wq); + io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); /* if we failed setting up the ctx, we might not have any rings */ io_iopoll_try_reap_events(ctx); @@ -8798,9 +8810,9 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ret |= io_poll_remove_all(ctx, task, NULL); ret |= io_kill_timeouts(ctx, task, NULL); + ret |= io_run_task_work(); if (!ret) break; - io_run_task_work(); cond_resched(); } } @@ -8849,10 +8861,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; + int ret; if (unlikely(!tctx)) { - int ret; - ret = io_uring_alloc_task_context(current); if (unlikely(ret)) return ret; @@ -8863,7 +8874,12 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) if (!old) { get_file(file); - xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); + ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + file, GFP_KERNEL)); + if (ret) { + fput(file); + return ret; + } } tctx->last = file; } @@ -8986,9 +9002,9 @@ void __io_uring_task_cancel(void) if (inflight != tctx_inflight(tctx)) continue; schedule(); + finish_wait(&tctx->wait, &wait); } while (1); - finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); } @@ -9156,10 +9172,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); - if (!list_empty_careful(&ctx->cq_overflow_list)) + if (!list_empty_careful(&ctx->cq_overflow_list)) { + bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + + io_ring_submit_lock(ctx, needs_lock); io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); + io_ring_submit_unlock(ctx, needs_lock); + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) @@ -9369,55 +9388,52 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, return 0; } +static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +{ + int ret, fd; + + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + + ret = io_uring_add_task_file(ctx, file); + if (ret) { + put_unused_fd(fd); + return ret; + } + fd_install(fd, file); + return fd; +} + /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, * we have to tie this fd to a socket for file garbage collection purposes. */ -static int io_uring_get_fd(struct io_ring_ctx *ctx) +static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { struct file *file; - int ret; - int fd; - #if defined(CONFIG_UNIX) + int ret; + ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, &ctx->ring_sock); if (ret) - return ret; + return ERR_PTR(ret); #endif - ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (ret < 0) - goto err; - fd = ret; - file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC); +#if defined(CONFIG_UNIX) if (IS_ERR(file)) { - put_unused_fd(fd); - ret = PTR_ERR(file); - goto err; + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; } - -#if defined(CONFIG_UNIX) - ctx->ring_sock->file = file; #endif - ret = io_uring_add_task_file(ctx, file); - if (ret) { - fput(file); - put_unused_fd(fd); - goto err; - } - fd_install(fd, file); - return fd; -err: -#if defined(CONFIG_UNIX) - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; -#endif - return ret; + return file; } static int io_uring_create(unsigned entries, struct io_uring_params *p, @@ -9425,6 +9441,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, { struct user_struct *user = NULL; struct io_ring_ctx *ctx; + struct file *file; bool limit_mem; int ret; @@ -9572,13 +9589,22 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } + file = io_uring_get_file(ctx); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err; + } + /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup */ - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; + ret = io_uring_install_fd(ctx, file); + if (ret < 0) { + /* fput will clean it up */ + fput(file); + return ret; + } trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 188f79d76988..2dc944442802 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1869,9 +1869,7 @@ static int load_superblock(journal_t *journal) if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); - num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); - if (!num_fc_blocks) - num_fc_blocks = JBD2_MIN_FC_BLOCKS; + num_fc_blocks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) journal->j_last = journal->j_fc_last - num_fc_blocks; journal->j_fc_first = journal->j_last + 1; @@ -2102,9 +2100,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) journal_superblock_t *sb = journal->j_superblock; unsigned long long num_fc_blks; - num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); - if (num_fc_blks == 0) - num_fc_blks = JBD2_MIN_FC_BLOCKS; + num_fc_blks = jbd2_journal_get_num_fc_blks(sb); if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) return -ENOSPC; diff --git a/fs/namei.c b/fs/namei.c index d1c1d23c44c9..44bc01aac801 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2198,8 +2198,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) return PTR_ERR(name); while (*name=='/') name++; - if (!*name) + if (!*name) { + nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; + } /* At this point we know we have a real path component. */ for(;;) { diff --git a/fs/namespace.c b/fs/namespace.c index 2b681f65ca04..d2db7dfe232b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -156,10 +156,10 @@ static inline void mnt_add_count(struct mount *mnt, int n) /* * vfsmount lock must be held for write */ -unsigned int mnt_get_count(struct mount *mnt) +int mnt_get_count(struct mount *mnt) { #ifdef CONFIG_SMP - unsigned int count = 0; + int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -1139,6 +1139,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { LIST_HEAD(list); + int count; rcu_read_lock(); if (likely(READ_ONCE(mnt->mnt_ns))) { @@ -1162,7 +1163,9 @@ static void mntput_no_expire(struct mount *mnt) */ smp_mb(); mnt_add_count(mnt, -1); - if (mnt_get_count(mnt)) { + count = mnt_get_count(mnt); + if (count != 0) { + WARN_ON(count < 0); rcu_read_unlock(); unlock_mount_hash(); return; diff --git a/fs/pnode.h b/fs/pnode.h index 49a058c73e4c..26f74e092bd9 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,7 +44,7 @@ int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -unsigned int mnt_get_count(struct mount *mnt); +int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, diff --git a/include/linux/efi.h b/include/linux/efi.h index d7c0e73af2b9..763b816ba19c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -817,12 +817,6 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} -static inline bool -efi_capsule_pending(int *reset_type) -{ - return false; -} - static inline bool efi_soft_reserve_enabled(void) { return false; @@ -1038,6 +1032,7 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); +#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); extern int efi_capsule_supported(efi_guid_t guid, u32 flags, @@ -1045,6 +1040,9 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags, extern int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages); +#else +static inline bool efi_capsule_pending(int *reset_type) { return false; } +#endif #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); @@ -1089,7 +1087,28 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(void); + +static inline +enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) +{ + u8 secboot, setupmode = 0; + efi_status_t status; + unsigned long size; + + size = sizeof(secboot); + status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, + &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + if (status != EFI_SUCCESS) + return efi_secureboot_mode_unknown; + + size = sizeof(setupmode); + get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + return efi_secureboot_mode_enabled; +} #ifdef CONFIG_RESET_ATTACK_MITIGATION void efi_enable_reset_attack_mitigation(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index ad4cf1bae586..fd47deea7c17 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2876,8 +2876,7 @@ extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || inode_unhashed(inode) || - (inode->i_state & I_DONTCACHE); + return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 870b3251e174..bb8ff9083e7d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); # define local_irq_enable_in_hardirq() local_irq_enable() #endif +bool irq_has_action(unsigned int irq); extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); diff --git a/include/linux/irq.h b/include/linux/irq.h index c332871d59da..4aeb1c4c7e07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -906,6 +906,13 @@ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) } #endif +static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_effective_affinity_mask(d) : NULL; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5745491303e0..891b323266df 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -113,6 +113,12 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, + unsigned int cpu) +{ + return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; +} + static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); @@ -179,12 +185,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { - return desc->action != NULL; -} - -static inline int irq_has_action(unsigned int irq) -{ - return irq_desc_has_action(irq_to_desc(irq)); + return desc && desc->action != NULL; } /** @@ -228,40 +229,31 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); + static inline bool irq_balancing_disabled(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; + return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU; + return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU_DEVID; + return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { - struct irq_desc *desc = irq_to_desc(irq); - - if (desc) { - lockdep_set_class(&desc->lock, lock_class); - lockdep_set_class(&desc->request_mutex, request_class); - } + if (IS_ENABLED(CONFIG_LOCKDEP)) + __irq_set_lockdep_class(irq, lock_class, request_class); } #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 578ff196b3ce..99d3cd051ac3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,7 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 -#define JBD2_MIN_FC_BLOCKS 256 +#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ @@ -538,6 +538,7 @@ struct transaction_chp_stats_s { * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. + * (Locking Documentation improved by LockDoc) */ /* @@ -658,12 +659,12 @@ struct transaction_s unsigned long t_start; /* - * When commit was requested + * When commit was requested [j_state_lock] */ unsigned long t_requested; /* - * Checkpointing stats [j_checkpoint_sem] + * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; @@ -1691,6 +1692,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } +static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) +{ + int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); + + return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; +} + /* * Return number of free blocks in the log. Must be called under j_state_lock. */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d..44ae1a7eb9e3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,7 +67,6 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ -extern unsigned int kstat_irqs(unsigned int irq); extern unsigned int kstat_irqs_usr(unsigned int irq); /* diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index a3a9a878415f..86376779ab31 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1284,6 +1284,8 @@ enum ec_feature_code { EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ EC_FEATURE_ISH = 40, + /* New TCPMv2 TYPEC_ prefaced commands supported */ + EC_FEATURE_TYPEC_CMD = 41, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) @@ -5528,6 +5530,159 @@ struct ec_response_regulator_get_voltage { uint32_t voltage_mv; } __ec_align4; +/* + * Gather all discovery information for the given port and partner type. + * + * Note that if discovery has not yet completed, only the currently completed + * responses will be filled in. If the discovery data structures are changed + * in the process of the command running, BUSY will be returned. + * + * VDO field sizes are set to the maximum possible number of VDOs a VDM may + * contain, while the number of SVIDs here is selected to fit within the PROTO2 + * maximum parameter size. + */ +#define EC_CMD_TYPEC_DISCOVERY 0x0131 + +enum typec_partner_type { + TYPEC_PARTNER_SOP = 0, + TYPEC_PARTNER_SOP_PRIME = 1, +}; + +struct ec_params_typec_discovery { + uint8_t port; + uint8_t partner_type; /* enum typec_partner_type */ +} __ec_align1; + +struct svid_mode_info { + uint16_t svid; + uint16_t mode_count; /* Number of modes partner sent */ + uint32_t mode_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ +}; + +struct ec_response_typec_discovery { + uint8_t identity_count; /* Number of identity VDOs partner sent */ + uint8_t svid_count; /* Number of SVIDs partner sent */ + uint16_t reserved; + uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ + struct svid_mode_info svids[0]; +} __ec_align1; + +/* + * Gather all status information for a port. + * + * Note: this covers many of the return fields from the deprecated + * EC_CMD_USB_PD_CONTROL command, except those that are redundant with the + * discovery data. The "enum pd_cc_states" is defined with the deprecated + * EC_CMD_USB_PD_CONTROL command. + * + * This also combines in the EC_CMD_USB_PD_MUX_INFO flags. + */ +#define EC_CMD_TYPEC_STATUS 0x0133 + +/* + * Power role. + * + * Note this is also used for PD header creation, and values align to those in + * the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.4 Port Power Role). + */ +enum pd_power_role { + PD_ROLE_SINK = 0, + PD_ROLE_SOURCE = 1 +}; + +/* + * Data role. + * + * Note this is also used for PD header creation, and the first two values + * align to those in the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.6 Port Data Role). + */ +enum pd_data_role { + PD_ROLE_UFP = 0, + PD_ROLE_DFP = 1, + PD_ROLE_DISCONNECTED = 2, +}; + +enum pd_vconn_role { + PD_ROLE_VCONN_OFF = 0, + PD_ROLE_VCONN_SRC = 1, +}; + +/* + * Note: BIT(0) may be used to determine whether the polarity is CC1 or CC2, + * regardless of whether a debug accessory is connected. + */ +enum tcpc_cc_polarity { + /* + * _CCx: is used to indicate the polarity while not connected to + * a Debug Accessory. Only one CC line will assert a resistor and + * the other will be open. + */ + POLARITY_CC1 = 0, + POLARITY_CC2 = 1, + + /* + * _CCx_DTS is used to indicate the polarity while connected to a + * SRC Debug Accessory. Assert resistors on both lines. + */ + POLARITY_CC1_DTS = 2, + POLARITY_CC2_DTS = 3, + + /* + * The current TCPC code relies on these specific POLARITY values. + * Adding in a check to verify if the list grows for any reason + * that this will give a hint that other places need to be + * adjusted. + */ + POLARITY_COUNT +}; + +#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) +#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) + +struct ec_params_typec_status { + uint8_t port; +} __ec_align1; + +struct ec_response_typec_status { + uint8_t pd_enabled; /* PD communication enabled - bool */ + uint8_t dev_connected; /* Device connected - bool */ + uint8_t sop_connected; /* Device is SOP PD capable - bool */ + uint8_t source_cap_count; /* Number of Source Cap PDOs */ + + uint8_t power_role; /* enum pd_power_role */ + uint8_t data_role; /* enum pd_data_role */ + uint8_t vconn_role; /* enum pd_vconn_role */ + uint8_t sink_cap_count; /* Number of Sink Cap PDOs */ + + uint8_t polarity; /* enum tcpc_cc_polarity */ + uint8_t cc_state; /* enum pd_cc_states */ + uint8_t dp_pin; /* DP pin mode (MODE_DP_IN_[A-E]) */ + uint8_t mux_state; /* USB_PD_MUX* - encoded mux state */ + + char tc_state[32]; /* TC state name */ + + uint32_t events; /* PD_STATUS_EVENT bitmask */ + + /* + * BCD PD revisions for partners + * + * The format has the PD major reversion in the upper nibble, and PD + * minor version in the next nibble. Following two nibbles are + * currently 0. + * ex. PD 3.2 would map to 0x3200 + * + * PD major/minor will be 0 if no PD device is connected. + */ + uint16_t sop_revision; + uint16_t sop_prime_revision; + + uint32_t source_cap_pdos[7]; /* Max 7 PDOs can be present */ + + uint32_t sink_cap_pdos[7]; /* Max 7 PDOs can be present */ +} __ec_align1; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30bc7a7223bb..0fefeb976877 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -42,6 +42,7 @@ struct vdpa_vq_state { * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @nvqs: maximum number of supported virtqueues */ struct vdpa_device { struct device dev; diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index 0b6869980ba2..e282ce02fa2d 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -11,7 +11,7 @@ struct ioc_gq; #include -TRACE_EVENT(iocost_iocg_activate, +DECLARE_EVENT_CLASS(iocost_iocg_state, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u64 last_period, u64 cur_period, u64 vtime), @@ -59,6 +59,20 @@ TRACE_EVENT(iocost_iocg_activate, ) ); +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_activate, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + +DEFINE_EVENT(iocost_iocg_state, iocost_iocg_idle, + TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, + u64 last_period, u64 cur_period, u64 vtime), + + TP_ARGS(iocg, path, now, last_period, cur_period, vtime) +); + DECLARE_EVENT_CLASS(iocg_inuse_update, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index b052355ac7a3..bc1c0621f5ed 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -29,24 +29,30 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#define VIRTIO_ID_NET 1 /* virtio net */ -#define VIRTIO_ID_BLOCK 2 /* virtio block */ -#define VIRTIO_ID_CONSOLE 3 /* virtio console */ -#define VIRTIO_ID_RNG 4 /* virtio rng */ -#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ -#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ -#define VIRTIO_ID_SCSI 8 /* virtio scsi */ -#define VIRTIO_ID_9P 9 /* 9p virtio console */ -#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ -#define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_GPU 16 /* virtio GPU */ -#define VIRTIO_ID_INPUT 18 /* virtio input */ -#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ -#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ -#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ -#define VIRTIO_ID_MEM 24 /* virtio mem */ -#define VIRTIO_ID_FS 26 /* virtio filesystem */ -#define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index e810eb9906ea..cc1a09406c6e 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -147,12 +147,12 @@ static ssize_t per_cpu_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - int cpu, irq = desc->irq_data.irq; ssize_t ret = 0; char *p = ""; + int cpu; for_each_possible_cpu(cpu) { - unsigned int c = kstat_irqs_cpu(irq, cpu); + unsigned int c = irq_desc_kstat_cpu(desc, cpu); ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); p = ","; @@ -352,7 +352,9 @@ struct irq_desc *irq_to_desc(unsigned int irq) { return radix_tree_lookup(&irq_desc_tree, irq); } -EXPORT_SYMBOL(irq_to_desc); +#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE +EXPORT_SYMBOL_GPL(irq_to_desc); +#endif static void delete_irq_desc(unsigned int irq) { @@ -924,15 +926,7 @@ static bool irq_is_nmi(struct irq_desc *desc) return desc->istate & IRQS_NMI; } -/** - * kstat_irqs - Get the statistics for an interrupt - * @irq: The interrupt number - * - * Returns the sum of interrupt counts on all cpus since boot for - * @irq. The caller must ensure that the interrupt is not removed - * concurrently. - */ -unsigned int kstat_irqs(unsigned int irq) +static unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned int sum = 0; @@ -943,21 +937,22 @@ unsigned int kstat_irqs(unsigned int irq) if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) - return desc->tot_count; + return data_race(desc->tot_count); for_each_possible_cpu(cpu) - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); + sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); return sum; } /** - * kstat_irqs_usr - Get the statistics for an interrupt + * kstat_irqs_usr - Get the statistics for an interrupt from thread context * @irq: The interrupt number * * Returns the sum of interrupt counts on all cpus since boot for @irq. - * Contrary to kstat_irqs() this can be called from any context. - * It uses rcu since a concurrent removal of an interrupt descriptor is - * observing an rcu grace period before delayed_free_desc()/irq_kobj_release(). + * + * It uses rcu to protect the access since a concurrent removal of an + * interrupt descriptor is observing an rcu grace period before + * delayed_free_desc()/irq_kobj_release(). */ unsigned int kstat_irqs_usr(unsigned int irq) { @@ -968,3 +963,17 @@ unsigned int kstat_irqs_usr(unsigned int irq) rcu_read_unlock(); return sum; } + +#ifdef CONFIG_LOCKDEP +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } +} +EXPORT_SYMBOL_GPL(__irq_set_lockdep_class); +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c826ba4141fe..ab8567f32501 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2822,3 +2822,40 @@ out_unlock: return err; } EXPORT_SYMBOL_GPL(irq_set_irqchip_state); + +/** + * irq_has_action - Check whether an interrupt is requested + * @irq: The linux irq number + * + * Returns: A snapshot of the current state + */ +bool irq_has_action(unsigned int irq) +{ + bool res; + + rcu_read_lock(); + res = irq_desc_has_action(irq_to_desc(irq)); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_has_action); + +/** + * irq_check_status_bit - Check whether bits in the irq descriptor status are set + * @irq: The linux irq number + * @bitmask: The bitmask to evaluate + * + * Returns: True if one of the bits in @bitmask is set + */ +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) +{ + struct irq_desc *desc; + bool res = false; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (desc) + res = !!(desc->status_use_accessors & bitmask); + rcu_read_unlock(); + return res; +} diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 72513ed2a5fc..98138788cb04 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -488,9 +488,10 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc || irq_settings_is_hidden(desc)) goto outsparse; - if (desc->kstat_irqs) + if (desc->kstat_irqs) { for_each_online_cpu(j) - any_count |= *per_cpu_ptr(desc->kstat_irqs, j); + any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); + } if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) goto outsparse; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 71b75db3e8eb..ff74120029fc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4033,15 +4033,20 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) } } +static void balance_push(struct rq *rq); + +struct callback_head balance_push_callback = { + .next = NULL, + .func = (void (*)(struct callback_head *))balance_push, +}; + static inline struct callback_head *splice_balance_callbacks(struct rq *rq) { struct callback_head *head = rq->balance_callback; lockdep_assert_held(&rq->lock); - if (head) { + if (head) rq->balance_callback = NULL; - rq->balance_flags &= ~BALANCE_WORK; - } return head; } @@ -4062,21 +4067,6 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head) } } -static void balance_push(struct rq *rq); - -static inline void balance_switch(struct rq *rq) -{ - if (likely(!rq->balance_flags)) - return; - - if (rq->balance_flags & BALANCE_PUSH) { - balance_push(rq); - return; - } - - __balance_callbacks(rq); -} - #else static inline void __balance_callbacks(struct rq *rq) @@ -4092,10 +4082,6 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head) { } -static inline void balance_switch(struct rq *rq) -{ -} - #endif static inline void @@ -4123,7 +4109,7 @@ static inline void finish_lock_switch(struct rq *rq) * prev into current: */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - balance_switch(rq); + __balance_callbacks(rq); raw_spin_unlock_irq(&rq->lock); } @@ -7334,6 +7320,10 @@ static void balance_push(struct rq *rq) lockdep_assert_held(&rq->lock); SCHED_WARN_ON(rq->cpu != smp_processor_id()); + /* + * Ensure the thing is persistent until balance_push_set(.on = false); + */ + rq->balance_callback = &balance_push_callback; /* * Both the cpu-hotplug and stop task are in this case and are @@ -7383,9 +7373,9 @@ static void balance_push_set(int cpu, bool on) rq_lock_irqsave(rq, &rf); if (on) - rq->balance_flags |= BALANCE_PUSH; + rq->balance_callback = &balance_push_callback; else - rq->balance_flags &= ~BALANCE_PUSH; + rq->balance_callback = NULL; rq_unlock_irqrestore(rq, &rf); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f885aadde900..8bcaf42b717a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -982,7 +982,6 @@ struct rq { unsigned long cpu_capacity_orig; struct callback_head *balance_callback; - unsigned char balance_flags; unsigned char nohz_idle_balance; unsigned char idle_balance; @@ -1235,6 +1234,8 @@ struct rq_flags { #endif }; +extern struct callback_head balance_push_callback; + /* * Lockdep annotation that avoids accidental unlocks; it's like a * sticky/continuous lockdep_assert_held(). @@ -1252,9 +1253,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) #ifdef CONFIG_SCHED_DEBUG rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rf->clock_update_flags = 0; -#endif #ifdef CONFIG_SMP - SCHED_WARN_ON(rq->balance_callback); + SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); +#endif #endif } @@ -1418,9 +1419,6 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) extern int migrate_swap(struct task_struct *p, struct task_struct *t, int cpu, int scpu); -#define BALANCE_WORK 0x01 -#define BALANCE_PUSH 0x02 - static inline void queue_balance_callback(struct rq *rq, struct callback_head *head, @@ -1428,13 +1426,12 @@ queue_balance_callback(struct rq *rq, { lockdep_assert_held(&rq->lock); - if (unlikely(head->next || (rq->balance_flags & BALANCE_PUSH))) + if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; head->func = (void (*)(struct callback_head *))func; head->next = rq->balance_callback; rq->balance_callback = head; - rq->balance_flags |= BALANCE_WORK; } #define rcu_dereference_check_sched_domain(p) \ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c01604224299..af41fb990820 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1784,39 +1784,112 @@ int remove_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(remove_memory); +static int try_offline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t online_type = MMOP_ONLINE_KERNEL; + uint8_t **online_types = arg; + struct page *page; + int rc; + + /* + * Sense the online_type via the zone of the memory block. Offlining + * with multiple zones within one memory block will be rejected + * by offlining code ... so we don't care about that. + */ + page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr)); + if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE) + online_type = MMOP_ONLINE_MOVABLE; + + rc = device_offline(&mem->dev); + /* + * Default is MMOP_OFFLINE - change it only if offlining succeeded, + * so try_reonline_memory_block() can do the right thing. + */ + if (!rc) + **online_types = online_type; + + (*online_types)++; + /* Ignore if already offline. */ + return rc < 0 ? rc : 0; +} + +static int try_reonline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t **online_types = arg; + int rc; + + if (**online_types != MMOP_OFFLINE) { + mem->online_type = **online_types; + rc = device_online(&mem->dev); + if (rc < 0) + pr_warn("%s: Failed to re-online memory: %d", + __func__, rc); + } + + /* Continue processing all remaining memory blocks. */ + (*online_types)++; + return 0; +} + /* - * Try to offline and remove a memory block. Might take a long time to - * finish in case memory is still in use. Primarily useful for memory devices - * that logically unplugged all memory (so it's no longer in use) and want to - * offline + remove the memory block. + * Try to offline and remove memory. Might take a long time to finish in case + * memory is still in use. Primarily useful for memory devices that logically + * unplugged all memory (so it's no longer in use) and want to offline + remove + * that memory. */ int offline_and_remove_memory(int nid, u64 start, u64 size) { - struct memory_block *mem; - int rc = -EINVAL; + const unsigned long mb_count = size / memory_block_size_bytes(); + uint8_t *online_types, *tmp; + int rc; if (!IS_ALIGNED(start, memory_block_size_bytes()) || - size != memory_block_size_bytes()) - return rc; - - lock_device_hotplug(); - mem = find_memory_block(__pfn_to_section(PFN_DOWN(start))); - if (mem) - rc = device_offline(&mem->dev); - /* Ignore if the device is already offline. */ - if (rc > 0) - rc = 0; + !IS_ALIGNED(size, memory_block_size_bytes()) || !size) + return -EINVAL; /* - * In case we succeeded to offline the memory block, remove it. + * We'll remember the old online type of each memory block, so we can + * try to revert whatever we did when offlining one memory block fails + * after offlining some others succeeded. + */ + online_types = kmalloc_array(mb_count, sizeof(*online_types), + GFP_KERNEL); + if (!online_types) + return -ENOMEM; + /* + * Initialize all states to MMOP_OFFLINE, so when we abort processing in + * try_offline_memory_block(), we'll skip all unprocessed blocks in + * try_reonline_memory_block(). + */ + memset(online_types, MMOP_OFFLINE, mb_count); + + lock_device_hotplug(); + + tmp = online_types; + rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block); + + /* + * In case we succeeded to offline all memory, remove it. * This cannot fail as it cannot get onlined in the meantime. */ if (!rc) { rc = try_remove_memory(nid, start, size); - WARN_ON_ONCE(rc); + if (rc) + pr_err("%s: Failed to remove memory: %d", __func__, rc); + } + + /* + * Rollback what we did. While memory onlining might theoretically fail + * (nacked by a notifier), it barely ever happens. + */ + if (rc) { + tmp = online_types; + walk_memory_blocks(start, size, &tmp, + try_reonline_memory_block); } unlock_device_hotplug(); + kfree(online_types); return rc; } EXPORT_SYMBOL_GPL(offline_and_remove_memory); diff --git a/scripts/coccicheck b/scripts/coccicheck index 209bb0427b43..65fee63aeadb 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -16,7 +16,6 @@ if [ ! -x "$SPATCH" ]; then fi SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}') -SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh) USE_JOBS="no" $SPATCH --help | grep "\-\-jobs" > /dev/null && USE_JOBS="yes" @@ -61,6 +60,18 @@ COCCIINCLUDE=${COCCIINCLUDE// -include/ --include} if [ "$C" = "1" -o "$C" = "2" ]; then ONLINE=1 + if [[ $# -le 0 ]]; then + echo '' + echo 'Specifying both the variable "C" and rule "coccicheck" in the make +command results in a shift count error.' + echo '' + echo 'Try specifying "scripts/coccicheck" as a value for the CHECK variable instead.' + echo '' + echo 'Example: make C=2 CHECK=scripts/coccicheck drivers/net/ethernet/ethoc.o' + echo '' + exit 1 + fi + # Take only the last argument, which is the C file to test shift $(( $# - 1 )) OPTIONS="$COCCIINCLUDE $1" @@ -186,14 +197,11 @@ coccinelle () { OPT=`grep "Options:" $COCCI | cut -d':' -f2` REQ=`grep "Requires:" $COCCI | cut -d':' -f2 | sed "s| ||"` - REQ_NUM=$(echo $REQ | ${DIR}/scripts/ld-version.sh) - if [ "$REQ_NUM" != "0" ] ; then - if [ "$SPATCH_VERSION_NUM" -lt "$REQ_NUM" ] ; then - echo "Skipping coccinelle SmPL patch: $COCCI" - echo "You have coccinelle: $SPATCH_VERSION" - echo "This SmPL patch requires: $REQ" - return - fi + if [ -n "$REQ" ] && ! { echo "$REQ"; echo "$SPATCH_VERSION"; } | sort -CV ; then + echo "Skipping coccinelle SmPL patch: $COCCI" + echo "You have coccinelle: $SPATCH_VERSION" + echo "This SmPL patch requires: $REQ" + return fi # The option '--parse-cocci' can be used to syntactically check the SmPL files. diff --git a/scripts/coccinelle/api/ptr_ret.cocci b/scripts/coccinelle/api/ptr_ret.cocci deleted file mode 100644 index e76cd5d90a8a..000000000000 --- a/scripts/coccinelle/api/ptr_ret.cocci +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// -/// Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR -/// -// Confidence: High -// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. -// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. -// URL: http://coccinelle.lip6.fr/ -// Options: --no-includes --include-headers -// -// Keywords: ERR_PTR, PTR_ERR, PTR_ERR_OR_ZERO -// Version min: 2.6.39 -// - -virtual context -virtual patch -virtual org -virtual report - -@depends on patch@ -expression ptr; -@@ - -- if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; -+ return PTR_ERR_OR_ZERO(ptr); - -@depends on patch@ -expression ptr; -@@ - -- if (IS_ERR(ptr)) return PTR_ERR(ptr); return 0; -+ return PTR_ERR_OR_ZERO(ptr); - -@depends on patch@ -expression ptr; -@@ - -- (IS_ERR(ptr) ? PTR_ERR(ptr) : 0) -+ PTR_ERR_OR_ZERO(ptr) - -@r1 depends on !patch@ -expression ptr; -position p1; -@@ - -* if@p1 (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0; - -@r2 depends on !patch@ -expression ptr; -position p2; -@@ - -* if@p2 (IS_ERR(ptr)) return PTR_ERR(ptr); return 0; - -@r3 depends on !patch@ -expression ptr; -position p3; -@@ - -* IS_ERR@p3(ptr) ? PTR_ERR(ptr) : 0 - -@script:python depends on org@ -p << r1.p1; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - - -@script:python depends on org@ -p << r2.p2; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on org@ -p << r3.p3; -@@ - -coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r1.p1; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r2.p2; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") - -@script:python depends on report@ -p << r3.p3; -@@ - -coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used") diff --git a/scripts/coccinelle/misc/boolinit.cocci b/scripts/coccinelle/misc/boolinit.cocci deleted file mode 100644 index fed6126e2b9d..000000000000 --- a/scripts/coccinelle/misc/boolinit.cocci +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/// Bool initializations should use true and false. Bool tests don't need -/// comparisons. Based on contributions from Joe Perches, Rusty Russell -/// and Bruce W Allan. -/// -// Confidence: High -// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. -// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. -// URL: http://coccinelle.lip6.fr/ -// Options: --include-headers - -virtual patch -virtual context -virtual org -virtual report - -@boolok@ -symbol true,false; -@@ -( -true -| -false -) - -@depends on patch@ -bool t; -@@ - -( -- t == true -+ t -| -- true == t -+ t -| -- t != true -+ !t -| -- true != t -+ !t -| -- t == false -+ !t -| -- false == t -+ !t -| -- t != false -+ t -| -- false != t -+ t -) - -@depends on patch disable is_zero, isnt_zero@ -bool t; -@@ - -( -- t == 1 -+ t -| -- t != 1 -+ !t -| -- t == 0 -+ !t -| -- t != 0 -+ t -) - -@depends on patch && boolok@ -bool b; -@@ -( - b = -- 0 -+ false -| - b = -- 1 -+ true -) - -// --------------------------------------------------------------------- - -@r1 depends on !patch@ -bool t; -position p; -@@ - -( -* t@p == true -| -* true == t@p -| -* t@p != true -| -* true != t@p -| -* t@p == false -| -* false == t@p -| -* t@p != false -| -* false != t@p -) - -@r2 depends on !patch disable is_zero, isnt_zero@ -bool t; -position p; -@@ - -( -* t@p == 1 -| -* t@p != 1 -| -* t@p == 0 -| -* t@p != 0 -) - -@r3 depends on !patch && boolok@ -bool b; -position p1; -@@ -( -*b@p1 = 0 -| -*b@p1 = 1 -) - -@r4 depends on !patch@ -bool b; -position p2; -identifier i; -constant c != {0,1}; -@@ -( - b = i -| -*b@p2 = c -) - -@script:python depends on org@ -p << r1.p; -@@ - -cocci.print_main("WARNING: Comparison to bool",p) - -@script:python depends on org@ -p << r2.p; -@@ - -cocci.print_main("WARNING: Comparison of 0/1 to bool variable",p) - -@script:python depends on org@ -p1 << r3.p1; -@@ - -cocci.print_main("WARNING: Assignment of 0/1 to bool variable",p1) - -@script:python depends on org@ -p2 << r4.p2; -@@ - -cocci.print_main("ERROR: Assignment of non-0/1 constant to bool variable",p2) - -@script:python depends on report@ -p << r1.p; -@@ - -coccilib.report.print_report(p[0],"WARNING: Comparison to bool") - -@script:python depends on report@ -p << r2.p; -@@ - -coccilib.report.print_report(p[0],"WARNING: Comparison of 0/1 to bool variable") - -@script:python depends on report@ -p1 << r3.p1; -@@ - -coccilib.report.print_report(p1[0],"WARNING: Assignment of 0/1 to bool variable") - -@script:python depends on report@ -p2 << r4.p2; -@@ - -coccilib.report.print_report(p2[0],"ERROR: Assignment of non-0/1 constant to bool variable") diff --git a/scripts/nsdeps b/scripts/nsdeps index dab4c1a0e27d..e8ce2a4d704a 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -12,11 +12,9 @@ if [ ! -x "$SPATCH" ]; then exit 1 fi -SPATCH_REQ_VERSION_NUM=$(echo $SPATCH_REQ_VERSION | ${DIR}/scripts/ld-version.sh) SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}') -SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh) -if [ "$SPATCH_VERSION_NUM" -lt "$SPATCH_REQ_VERSION_NUM" ] ; then +if ! { echo "$SPATCH_REQ_VERSION"; echo "$SPATCH_VERSION"; } | sort -CV ; then echo "spatch needs to be version $SPATCH_REQ_VERSION or higher" exit 1 fi diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 67dabca670e2..2499f2485c04 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -14,3 +14,7 @@ ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o ima-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o ima-$(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) += ima_asymmetric_keys.o ima-$(CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS) += ima_queue_keys.o + +ifeq ($(CONFIG_EFI),y) +ima-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_efi.o +endif diff --git a/arch/x86/kernel/ima_arch.c b/security/integrity/ima/ima_efi.c similarity index 60% rename from arch/x86/kernel/ima_arch.c rename to security/integrity/ima/ima_efi.c index 7dfb1e808928..71786d01946f 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/security/integrity/ima/ima_efi.c @@ -5,50 +5,29 @@ #include #include #include +#include -extern struct boot_params boot_params; +#ifndef arch_ima_efi_boot_mode +#define arch_ima_efi_boot_mode efi_secureboot_mode_unset +#endif static enum efi_secureboot_mode get_sb_mode(void) { - efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; - efi_status_t status; - unsigned long size; - u8 secboot, setupmode; - - size = sizeof(secboot); + enum efi_secureboot_mode mode; if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) { pr_info("ima: secureboot mode unknown, no efi\n"); return efi_secureboot_mode_unknown; } - /* Get variable contents into buffer */ - status = efi.get_variable(L"SecureBoot", &efi_variable_guid, - NULL, &size, &secboot); - if (status == EFI_NOT_FOUND) { + mode = efi_get_secureboot_mode(efi.get_variable); + if (mode == efi_secureboot_mode_disabled) pr_info("ima: secureboot mode disabled\n"); - return efi_secureboot_mode_disabled; - } - - if (status != EFI_SUCCESS) { + else if (mode == efi_secureboot_mode_unknown) pr_info("ima: secureboot mode unknown\n"); - return efi_secureboot_mode_unknown; - } - - size = sizeof(setupmode); - status = efi.get_variable(L"SetupMode", &efi_variable_guid, - NULL, &size, &setupmode); - - if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ - setupmode = 0; - - if (secboot == 0 || setupmode == 1) { - pr_info("ima: secureboot mode disabled\n"); - return efi_secureboot_mode_disabled; - } - - pr_info("ima: secureboot mode enabled\n"); - return efi_secureboot_mode_enabled; + else + pr_info("ima: secureboot mode enabled\n"); + return mode; } bool arch_ima_get_secureboot(void) @@ -57,7 +36,7 @@ bool arch_ima_get_secureboot(void) static bool initialized; if (!initialized && efi_enabled(EFI_BOOT)) { - sb_mode = boot_params.secure_boot; + sb_mode = arch_ima_efi_boot_mode; if (sb_mode == efi_secureboot_mode_unset) sb_mode = get_sb_mode(); diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index efe2406a3960..7eabb448acab 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -688,9 +688,10 @@ bool smack_privileged_cred(int cap, const struct cred *cred) bool smack_privileged(int cap) { /* - * All kernel tasks are privileged + * Kernel threads may not have credentials we can use. + * The io_uring kernel threads do have reliable credentials. */ - if (unlikely(current->flags & PF_KTHREAD)) + if ((current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD) return true; return smack_privileged_cred(cap, current_cred()); diff --git a/sound/core/init.c b/sound/core/init.c index 764dbe673d48..75aec71c48a8 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -149,8 +149,6 @@ static void release_card_device(struct device *dev) * @extra_size: allocate this extra size after the main soundcard structure * @card_ret: the pointer to store the created card instance * - * Creates and initializes a soundcard structure. - * * The function allocates snd_card instance via kzalloc with the given * space for the driver to use freely. The allocated struct is stored * in the given card_ret pointer. diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 0aeeb6244ff6..966bef5acc75 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -77,7 +77,8 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size) /* Assign the pool into private_data field */ dmab->private_data = pool; - dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr); + dmab->area = gen_pool_dma_alloc_align(pool, size, &dmab->addr, + PAGE_SIZE); } /** @@ -132,6 +133,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, if (WARN_ON(!dmab)) return -ENXIO; + size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; dmab->bytes = 0; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index de1917484647..142fc751a847 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -693,6 +693,8 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_buffer_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; + if (!oss_buffer_size) + return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { if (oss_buffer_size > runtime->oss.mmap_bytes) @@ -728,17 +730,21 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - min_period_size *= oss_frame_size; - min_period_size = roundup_pow_of_two(min_period_size); - if (oss_period_size < min_period_size) - oss_period_size = min_period_size; + if (min_period_size) { + min_period_size *= oss_frame_size; + min_period_size = roundup_pow_of_two(min_period_size); + if (oss_period_size < min_period_size) + oss_period_size = min_period_size; + } max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - max_period_size *= oss_frame_size; - max_period_size = rounddown_pow_of_two(max_period_size); - if (oss_period_size > max_period_size) - oss_period_size = max_period_size; + if (max_period_size) { + max_period_size *= oss_frame_size; + max_period_size = rounddown_pow_of_two(max_period_size); + if (oss_period_size > max_period_size) + oss_period_size = max_period_size; + } oss_periods = oss_buffer_size / oss_period_size; diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index 4f03ba8ed0ae..ee6e9c5eec45 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -89,14 +89,6 @@ static int preallocate_pcm_pages(struct snd_pcm_substream *substream, size_t siz return 0; } -/* - * release the preallocated buffer if not yet done. - */ -static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream) -{ - do_free_pages(substream->pcm->card, &substream->dma_buffer); -} - /** * snd_pcm_lib_preallocate_free - release the preallocated buffer of the specified substream. * @substream: the pcm substream instance @@ -105,7 +97,7 @@ static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream */ void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream) { - snd_pcm_lib_preallocate_dma_free(substream); + do_free_pages(substream->pcm->card, &substream->dma_buffer); } /** diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 47b155a49226..9f3f8e953ff0 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -755,8 +755,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, runtime->boundary *= 2; /* clear the buffer for avoiding possible kernel info leaks */ - if (runtime->dma_area && !substream->ops->copy_user) - memset(runtime->dma_area, 0, runtime->dma_bytes); + if (runtime->dma_area && !substream->ops->copy_user) { + size_t size = runtime->dma_bytes; + + if (runtime->info & SNDRV_PCM_INFO_MMAP) + size = PAGE_ALIGN(size); + memset(runtime->dma_area, 0, size); + } snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 41cc64036f22..dde5ba209541 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2516,6 +2516,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), @@ -6368,6 +6369,7 @@ enum { ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, ALC236_FIXUP_DELL_AIO_HEADSET_MIC, + ALC282_FIXUP_ACER_DISABLE_LINEOUT, }; static const struct hda_fixup alc269_fixups[] = { @@ -7791,6 +7793,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC282_FIXUP_ACER_DISABLE_LINEOUT] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x411111f0 }, + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7805,11 +7817,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), + SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x1025, 0x1167, "Acer Veriton N6640G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), @@ -7870,6 +7885,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a58, "Dell Precision 3650 Tower", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -8017,6 +8033,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8564,6 +8581,22 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x19, 0x04a11030}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a609c0}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC282_STANDARD_PINS, + {0x12, 0x90a60940}, + {0x18, 0x03a11830}, + {0x19, 0x04a19831}, + {0x1a, 0x0481303f}, + {0x1b, 0x04211020}, + {0x21, 0x0321101f}), SND_HDA_PIN_QUIRK(0x10ec0283, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC282_STANDARD_PINS, {0x12, 0x90a60130}, diff --git a/sound/usb/card.c b/sound/usb/card.c index cb0b6582dfee..d731ca62d599 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -383,6 +383,9 @@ static const struct usb_audio_device_name usb_audio_names[] = { /* ASUS ROG Strix */ PROFILE_NAME(0x0b05, 0x1917, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), + /* ASUS PRIME TRX40 PRO-S */ + PROFILE_NAME(0x0b05, 0x1918, + "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), diff --git a/sound/usb/clock.c b/sound/usb/clock.c index e940dcee792b..31051f2be46d 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -534,6 +534,12 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, } crate = data[0] | (data[1] << 8) | (data[2] << 16); + if (!crate) { + dev_info(&dev->dev, "failed to read current rate; disabling the check\n"); + chip->sample_rate_read_error = 3; /* three strikes, see above */ + return 0; + } + if (crate != rate) { dev_warn(&dev->dev, "current rate %d is different from the runtime rate %d\n", crate, rate); // runtime->rate = crate; diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 4e911d200562..eb3a4c433c3e 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -75,6 +75,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* No quirk for playback but with capture quirk (see below) */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130), /* BOSS BR-80 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189), /* BOSS GT-100v2 */ + IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6), /* BOSS GT-1 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8), /* BOSS Katana */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01e5), /* BOSS GT-001 */ @@ -85,6 +86,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */ + IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01e5, 0x0d, 0x01), /* BOSS GT-001 */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 63cdf3c8c2bc..e4a690bb4c99 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1771,6 +1771,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x25ce: /* Mytek devices */ case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ + case 0x2972: /* FiiO devices */ case 0x2ab6: /* T+A devices */ case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411..24223adae150 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f5ef2d5b9231..84b887825f12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -376,6 +377,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2b5fc9accec4..546d6ecf0a35 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -472,6 +472,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..8e76d3701db3 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index f1d8307454e0..554f75fe013c 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -77,10 +77,28 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ #define SVM_VMGEXIT_MMIO_READ 0x80000001 @@ -183,10 +201,20 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d5..ada955c5ebb6 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fc48c64700eb..728752917785 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 442 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index ca41220b40b8..886802b8ffba 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1514,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1557,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index e58d00d62f02..840ea0e59287 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -14,7 +14,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/powerpc/include/generated/asm header32 := $(out)/syscalls_32.c header64 := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/powerpc/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -23,15 +22,9 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header64): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@ $(header32): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@ clean:: diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b168364ac050..f744eb5cba88 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,7 +9,9 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 nospu fork ppc_fork +2 32 fork ppc_fork sys_fork +2 64 fork sys_fork +2 spu fork sys_ni_syscall 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -158,7 +160,9 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 nospu clone ppc_clone +120 32 clone ppc_clone sys_clone +120 64 clone sys_clone +120 spu clone sys_ni_syscall 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -240,7 +244,9 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 nospu vfork ppc_vfork +189 32 vfork ppc_vfork sys_vfork +189 64 vfork sys_vfork +189 spu vfork sys_ni_syscall 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -316,8 +322,8 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext ppc32_swapcontext -249 64 swapcontext ppc64_swapcontext +249 32 swapcontext ppc_swapcontext compat_sys_swapcontext +249 64 swapcontext sys_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 @@ -456,7 +462,7 @@ 361 common bpf sys_bpf 362 nospu execveat sys_execveat compat_sys_execveat 363 32 switch_endian sys_ni_syscall -363 64 switch_endian ppc_switch_endian +363 64 switch_endian sys_switch_endian 363 spu switch_endian sys_ni_syscall 364 common userfaultfd sys_userfaultfd 365 common membarrier sys_membarrier @@ -516,6 +522,12 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 nospu clone3 ppc_clone3 +435 32 clone3 ppc_clone3 sys_clone3 +435 64 clone3 sys_clone3 +435 spu clone3 sys_ni_syscall +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 6ac8887be7c9..74bffbea03e2 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -12,7 +12,6 @@ PERF_HAVE_JITDUMP := 1 out := $(OUTPUT)arch/s390/include/generated/asm header := $(out)/syscalls_64.c -syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl @@ -21,9 +20,6 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B $(sysdef) $(syskrn) >/dev/null) \ - || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ clean:: diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d2fa9647ce25..d443423495e5 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -15,86 +15,86 @@ 5 common open sys_open compat_sys_open 6 common close sys_close sys_close 7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat compat_sys_creat -9 common link sys_link compat_sys_link -10 common unlink sys_unlink compat_sys_unlink +8 common creat sys_creat sys_creat +9 common link sys_link sys_link +10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir compat_sys_chdir -13 32 time - compat_sys_time -14 common mknod sys_mknod compat_sys_mknod -15 common chmod sys_chmod compat_sys_chmod -16 32 lchown - compat_sys_s390_lchown16 +12 common chdir sys_chdir sys_chdir +13 32 time - sys_time32 +14 common mknod sys_mknod sys_mknod +15 common chmod sys_chmod sys_chmod +16 32 lchown - sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid -21 common mount sys_mount -22 common umount sys_oldumount compat_sys_oldumount -23 32 setuid - compat_sys_s390_setuid16 -24 32 getuid - compat_sys_s390_getuid16 -25 32 stime - compat_sys_stime +21 common mount sys_mount sys_mount +22 common umount sys_oldumount sys_oldumount +23 32 setuid - sys_setuid16 +24 32 getuid - sys_getuid16 +25 32 stime - sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause -30 common utime sys_utime compat_sys_utime -33 common access sys_access compat_sys_access +30 common utime sys_utime sys_utime32 +33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync 37 common kill sys_kill sys_kill -38 common rename sys_rename compat_sys_rename -39 common mkdir sys_mkdir compat_sys_mkdir -40 common rmdir sys_rmdir compat_sys_rmdir +38 common rename sys_rename sys_rename +39 common mkdir sys_mkdir sys_mkdir +40 common rmdir sys_rmdir sys_rmdir 41 common dup sys_dup sys_dup -42 common pipe sys_pipe compat_sys_pipe +42 common pipe sys_pipe sys_pipe 43 common times sys_times compat_sys_times -45 common brk sys_brk compat_sys_brk -46 32 setgid - compat_sys_s390_setgid16 -47 32 getgid - compat_sys_s390_getgid16 -48 common signal sys_signal compat_sys_signal -49 32 geteuid - compat_sys_s390_geteuid16 -50 32 getegid - compat_sys_s390_getegid16 -51 common acct sys_acct compat_sys_acct -52 common umount2 sys_umount compat_sys_umount +45 common brk sys_brk sys_brk +46 32 setgid - sys_setgid16 +47 32 getgid - sys_getgid16 +48 common signal sys_signal sys_signal +49 32 geteuid - sys_geteuid16 +50 32 getegid - sys_getegid16 +51 common acct sys_acct sys_acct +52 common umount2 sys_umount sys_umount 54 common ioctl sys_ioctl compat_sys_ioctl 55 common fcntl sys_fcntl compat_sys_fcntl 57 common setpgid sys_setpgid sys_setpgid 60 common umask sys_umask sys_umask -61 common chroot sys_chroot compat_sys_chroot +61 common chroot sys_chroot sys_chroot 62 common ustat sys_ustat compat_sys_ustat 63 common dup2 sys_dup2 sys_dup2 64 common getppid sys_getppid sys_getppid 65 common getpgrp sys_getpgrp sys_getpgrp 66 common setsid sys_setsid sys_setsid 67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - compat_sys_s390_setreuid16 -71 32 setregid - compat_sys_s390_setregid16 -72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +70 32 setreuid - sys_setreuid16 +71 32 setregid - sys_setregid16 +72 common sigsuspend sys_sigsuspend sys_sigsuspend 73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname compat_sys_sethostname +74 common sethostname sys_sethostname sys_sethostname 75 common setrlimit sys_setrlimit compat_sys_setrlimit 76 32 getrlimit - compat_sys_old_getrlimit 77 common getrusage sys_getrusage compat_sys_getrusage 78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday 79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - compat_sys_s390_getgroups16 -81 32 setgroups - compat_sys_s390_setgroups16 -83 common symlink sys_symlink compat_sys_symlink -85 common readlink sys_readlink compat_sys_readlink -86 common uselib sys_uselib compat_sys_uselib -87 common swapon sys_swapon compat_sys_swapon -88 common reboot sys_reboot compat_sys_reboot +80 32 getgroups - sys_getgroups16 +81 32 setgroups - sys_setgroups16 +83 common symlink sys_symlink sys_symlink +85 common readlink sys_readlink sys_readlink +86 common uselib sys_uselib sys_uselib +87 common swapon sys_swapon sys_swapon +88 common reboot sys_reboot sys_reboot 89 common readdir - compat_sys_old_readdir 90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap compat_sys_munmap +91 common munmap sys_munmap sys_munmap 92 common truncate sys_truncate compat_sys_truncate 93 common ftruncate sys_ftruncate compat_sys_ftruncate 94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - compat_sys_s390_fchown16 +95 32 fchown - sys_fchown16 96 common getpriority sys_getpriority sys_getpriority 97 common setpriority sys_setpriority sys_setpriority 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 32 ioperm - - 102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog compat_sys_syslog +103 common syslog sys_syslog sys_syslog 104 common setitimer sys_setitimer compat_sys_setitimer 105 common getitimer sys_getitimer compat_sys_getitimer 106 common stat sys_newstat compat_sys_newstat @@ -104,76 +104,76 @@ 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff compat_sys_swapoff +115 common swapoff sys_swapoff sys_swapoff 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common ipc sys_s390_ipc compat_sys_s390_ipc 118 common fsync sys_fsync sys_fsync 119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone compat_sys_clone -121 common setdomainname sys_setdomainname compat_sys_setdomainname -122 common uname sys_newuname compat_sys_newuname -124 common adjtimex sys_adjtimex compat_sys_adjtimex -125 common mprotect sys_mprotect compat_sys_mprotect +120 common clone sys_clone sys_clone +121 common setdomainname sys_setdomainname sys_setdomainname +122 common uname sys_newuname sys_newuname +124 common adjtimex sys_adjtimex sys_adjtimex_time32 +125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - -128 common init_module sys_init_module compat_sys_init_module -129 common delete_module sys_delete_module compat_sys_delete_module +128 common init_module sys_init_module sys_init_module +129 common delete_module sys_delete_module sys_delete_module 130 common get_kernel_syms - - -131 common quotactl sys_quotactl compat_sys_quotactl +131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_bdflush compat_sys_bdflush -135 common sysfs sys_sysfs compat_sys_sysfs +134 common bdflush sys_bdflush sys_bdflush +135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - -138 32 setfsuid - compat_sys_s390_setfsuid16 -139 32 setfsgid - compat_sys_s390_setfsgid16 -140 32 _llseek - compat_sys_llseek +138 32 setfsuid - sys_setfsuid16 +139 32 setfsgid - sys_setfsgid16 +140 32 _llseek - sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 32 _newselect - compat_sys_select 142 64 select sys_select - 143 common flock sys_flock sys_flock -144 common msync sys_msync compat_sys_msync -145 common readv sys_readv -146 common writev sys_writev +144 common msync sys_msync sys_msync +145 common readv sys_readv sys_readv +146 common writev sys_writev sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl - - -150 common mlock sys_mlock compat_sys_mlock -151 common munlock sys_munlock compat_sys_munlock +150 common mlock sys_mlock sys_mlock +151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall 153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam -155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +154 common sched_setparam sys_sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler 157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep -163 common mremap sys_mremap compat_sys_mremap -164 32 setresuid - compat_sys_s390_setresuid16 -165 32 getresuid - compat_sys_s390_getresuid16 +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 +163 common mremap sys_mremap sys_mremap +164 32 setresuid - sys_setresuid16 +165 32 getresuid - sys_getresuid16 167 common query_module - - -168 common poll sys_poll compat_sys_poll +168 common poll sys_poll sys_poll 169 common nfsservctl - - -170 32 setresgid - compat_sys_s390_setresgid16 -171 32 getresgid - compat_sys_s390_getresgid16 -172 common prctl sys_prctl compat_sys_prctl +170 32 setresgid - sys_setresgid16 +171 32 getresgid - sys_getresgid16 +172 common prctl sys_prctl sys_prctl 173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - compat_sys_s390_chown16 -183 common getcwd sys_getcwd compat_sys_getcwd -184 common capget sys_capget compat_sys_capget -185 common capset sys_capset compat_sys_capset +182 32 chown - sys_chown16 +183 common getcwd sys_getcwd sys_getcwd +184 common capget sys_capget sys_capget +185 common capset sys_capset sys_capset 186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 187 common sendfile sys_sendfile64 compat_sys_sendfile 188 common getpmsg - - @@ -187,7 +187,7 @@ 195 32 stat64 - compat_sys_s390_stat64 196 32 lstat64 - compat_sys_s390_lstat64 197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - compat_sys_lchown +198 32 lchown32 - sys_lchown 198 64 lchown sys_lchown - 199 32 getuid32 - sys_getuid 199 64 getuid sys_getuid - @@ -201,21 +201,21 @@ 203 64 setreuid sys_setreuid - 204 32 setregid32 - sys_setregid 204 64 setregid sys_setregid - -205 32 getgroups32 - compat_sys_getgroups +205 32 getgroups32 - sys_getgroups 205 64 getgroups sys_getgroups - -206 32 setgroups32 - compat_sys_setgroups +206 32 setgroups32 - sys_setgroups 206 64 setgroups sys_setgroups - 207 32 fchown32 - sys_fchown 207 64 fchown sys_fchown - 208 32 setresuid32 - sys_setresuid 208 64 setresuid sys_setresuid - -209 32 getresuid32 - compat_sys_getresuid +209 32 getresuid32 - sys_getresuid 209 64 getresuid sys_getresuid - 210 32 setresgid32 - sys_setresgid 210 64 setresgid sys_setresgid - -211 32 getresgid32 - compat_sys_getresgid +211 32 getresgid32 - sys_getresgid 211 64 getresgid sys_getresgid - -212 32 chown32 - compat_sys_chown +212 32 chown32 - sys_chown 212 64 chown sys_chown - 213 32 setuid32 - sys_setuid 213 64 setuid sys_setuid - @@ -225,166 +225,222 @@ 215 64 setfsuid sys_setfsuid - 216 32 setfsgid32 - sys_setfsgid 216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root compat_sys_pivot_root -218 common mincore sys_mincore compat_sys_mincore -219 common madvise sys_madvise compat_sys_madvise -220 common getdents64 sys_getdents64 compat_sys_getdents64 +217 common pivot_root sys_pivot_root sys_pivot_root +218 common mincore sys_mincore sys_mincore +219 common madvise sys_madvise sys_madvise +220 common getdents64 sys_getdents64 sys_getdents64 221 32 fcntl64 - compat_sys_fcntl64 222 common readahead sys_readahead compat_sys_s390_readahead 223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr compat_sys_setxattr -225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr -226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr -227 common getxattr sys_getxattr compat_sys_getxattr -228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr -229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr -230 common listxattr sys_listxattr compat_sys_listxattr -231 common llistxattr sys_llistxattr compat_sys_llistxattr -232 common flistxattr sys_flistxattr compat_sys_flistxattr -233 common removexattr sys_removexattr compat_sys_removexattr -234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr -235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +224 common setxattr sys_setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr sys_listxattr +231 common llistxattr sys_llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr sys_flistxattr +233 common removexattr sys_removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill -238 common futex sys_futex compat_sys_futex +238 common futex sys_futex sys_futex_time32 239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy compat_sys_io_destroy -245 common io_getevents sys_io_getevents compat_sys_io_getevents +244 common io_destroy sys_io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents sys_io_getevents_time32 246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel compat_sys_io_cancel +247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group 249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl -251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait -252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime compat_sys_timer_settime -256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +255 common timer_settime sys_timer_settime sys_timer_settime32 +256 common timer_gettime sys_timer_gettime sys_timer_gettime32 257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun 258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime compat_sys_clock_settime -260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -261 common clock_getres sys_clock_getres compat_sys_clock_getres -262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +259 common clock_settime sys_clock_settime sys_clock_settime32 +260 common clock_gettime sys_clock_gettime sys_clock_gettime32 +261 common clock_getres sys_clock_getres sys_clock_getres_time32 +262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages 268 common mbind sys_mbind compat_sys_mbind 269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +272 common mq_unlink sys_mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key compat_sys_add_key -279 common request_key sys_request_key compat_sys_request_key +278 common add_key sys_add_key sys_add_key +279 common request_key sys_request_key sys_request_key 280 common keyctl sys_keyctl compat_sys_keyctl 281 common waitid sys_waitid compat_sys_waitid 282 common ioprio_set sys_ioprio_set sys_ioprio_set 283 common ioprio_get sys_ioprio_get sys_ioprio_get 284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch 287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages 288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat compat_sys_mkdirat -290 common mknodat sys_mknodat compat_sys_mknodat -291 common fchownat sys_fchownat compat_sys_fchownat -292 common futimesat sys_futimesat compat_sys_futimesat +289 common mkdirat sys_mkdirat sys_mkdirat +290 common mknodat sys_mknodat sys_mknodat +291 common fchownat sys_fchownat sys_fchownat +292 common futimesat sys_futimesat sys_futimesat_time32 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat compat_sys_unlinkat -295 common renameat sys_renameat compat_sys_renameat -296 common linkat sys_linkat compat_sys_linkat -297 common symlinkat sys_symlinkat compat_sys_symlinkat -298 common readlinkat sys_readlinkat compat_sys_readlinkat -299 common fchmodat sys_fchmodat compat_sys_fchmodat -300 common faccessat sys_faccessat compat_sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6 -302 common ppoll sys_ppoll compat_sys_ppoll -303 common unshare sys_unshare compat_sys_unshare +294 common unlinkat sys_unlinkat sys_unlinkat +295 common renameat sys_renameat sys_renameat +296 common linkat sys_linkat sys_linkat +297 common symlinkat sys_symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat sys_fchmodat +300 common faccessat sys_faccessat sys_faccessat +301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 common ppoll sys_ppoll compat_sys_ppoll_time32 +303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice compat_sys_splice +306 common splice sys_splice sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee compat_sys_tee +308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages -311 common getcpu sys_getcpu compat_sys_getcpu +311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes compat_sys_utimes +313 common utimes sys_utimes sys_utimes_time32 314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat compat_sys_utimensat +315 common utimensat sys_utimensat sys_utimensat_time32 316 common signalfd sys_signalfd compat_sys_signalfd 317 common timerfd - - 318 common eventfd sys_eventfd sys_eventfd 319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 compat_sys_pipe2 +325 common pipe2 sys_pipe2 sys_pipe2 326 common dup3 sys_dup3 sys_dup3 327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 328 common preadv sys_preadv compat_sys_preadv 329 common pwritev sys_pwritev compat_sys_pwritev 330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +331 common perf_event_open sys_perf_event_open sys_perf_event_open 332 common fanotify_init sys_fanotify_init sys_fanotify_init 333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +334 common prlimit64 sys_prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns 340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv 341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp compat_sys_kcmp -344 common finit_module sys_finit_module compat_sys_finit_module -345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr -346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr -347 common renameat2 sys_renameat2 compat_sys_renameat2 -348 common seccomp sys_seccomp compat_sys_seccomp -349 common getrandom sys_getrandom compat_sys_getrandom -350 common memfd_create sys_memfd_create compat_sys_memfd_create -351 common bpf sys_bpf compat_sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +343 common kcmp sys_kcmp sys_kcmp +344 common finit_module sys_finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 sys_renameat2 +348 common seccomp sys_seccomp sys_seccomp +349 common getrandom sys_getrandom sys_getrandom +350 common memfd_create sys_memfd_create sys_memfd_create +351 common bpf sys_bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair compat_sys_socketpair -361 common bind sys_bind compat_sys_bind -362 common connect sys_connect compat_sys_connect +360 common socketpair sys_socketpair sys_socketpair +361 common bind sys_bind sys_bind +362 common connect sys_connect sys_connect 363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 compat_sys_accept4 +364 common accept4 sys_accept4 sys_accept4 365 common getsockopt sys_getsockopt sys_getsockopt 366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname compat_sys_getsockname -368 common getpeername sys_getpeername compat_sys_getpeername -369 common sendto sys_sendto compat_sys_sendto +367 common getsockname sys_getsockname sys_getsockname +368 common getpeername sys_getpeername sys_getpeername +369 common sendto sys_sendto sys_sendto 370 common sendmsg sys_sendmsg compat_sys_sendmsg 371 common recvfrom sys_recvfrom compat_sys_recvfrom 372 common recvmsg sys_recvmsg compat_sys_recvmsg 373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 compat_sys_mlock2 -375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +374 common mlock2 sys_mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range sys_copy_file_range 376 common preadv2 sys_preadv2 compat_sys_preadv2 377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage -379 common statx sys_statx compat_sys_statx -380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi +378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx sys_statx +380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free sys_pkey_free +# room for arch specific syscalls +392 64 semtimedop sys_semtimedop - +393 common semget sys_semget sys_semget +394 common semctl sys_semctl compat_sys_semctl +395 common shmget sys_shmget sys_shmget +396 common shmctl sys_shmctl compat_sys_shmctl +397 common shmat sys_shmat compat_sys_shmat +398 common shmdt sys_shmdt sys_shmdt +399 common msgget sys_msgget sys_msgget +400 common msgsnd sys_msgsnd compat_sys_msgsnd +401 common msgrcv sys_msgrcv compat_sys_msgrcv +402 common msgctl sys_msgctl compat_sys_msgctl +403 32 clock_gettime64 - sys_clock_gettime +404 32 clock_settime64 - sys_clock_settime +405 32 clock_adjtime64 - sys_clock_adjtime +406 32 clock_getres_time64 - sys_clock_getres +407 32 clock_nanosleep_time64 - sys_clock_nanosleep +408 32 timer_gettime64 - sys_timer_gettime +409 32 timer_settime64 - sys_timer_settime +410 32 timerfd_gettime64 - sys_timerfd_gettime +411 32 timerfd_settime64 - sys_timerfd_settime +412 32 utimensat_time64 - sys_utimensat +413 32 pselect6_time64 - compat_sys_pselect6_time64 +414 32 ppoll_time64 - compat_sys_ppoll_time64 +416 32 io_pgetevents_time64 - sys_io_pgetevents +417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 +418 32 mq_timedsend_time64 - sys_mq_timedsend +419 32 mq_timedreceive_time64 - sys_mq_timedreceive +420 32 semtimedop_time64 - sys_semtimedop +421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 +422 32 futex_time64 - sys_futex +423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree sys_open_tree +429 common move_mount sys_move_mount sys_move_mount +430 common fsopen sys_fsopen sys_fsopen +431 common fsconfig sys_fsconfig sys_fsconfig +432 common fsmount sys_fsmount sys_fsmount +433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 sys_clone3 +436 common close_range sys_close_range sys_close_range +437 common openat2 sys_openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 379819244b91..78672124d28b 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,7 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 89c32692f40c..8cc24967bc27 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1186,65 +1186,67 @@ static struct option stat_options[] = { OPT_END() }; -static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } -static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_die(map, cpu, NULL); } -static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } -static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int cpu) { return cpu_map__get_node(map, cpu, NULL); } -static int perf_stat__get_aggr(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx >= map->nr) - return -1; + return id; cpu = map->map[idx]; - if (config->cpus_aggr_map->map[cpu] == -1) + if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - return config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu]; + return id; } -static int perf_stat__get_socket_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } -static int perf_stat__get_die_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); } -static int perf_stat__get_core_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } -static int perf_stat__get_node_cached(struct perf_stat_config *config, +static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); @@ -1318,14 +1320,29 @@ static int perf_stat_init_aggr_mode(void) * the aggregation translate cpumap. */ nr = perf_cpu_map__max(evsel_list->core.cpus); - stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); + stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } +static void cpu_aggr_map__delete(struct cpu_aggr_map *map) +{ + if (map) { + WARN_ONCE(refcount_read(&map->refcnt) != 0, + "cpu_aggr_map refcnt unbalanced\n"); + free(map); + } +} + +static void cpu_aggr_map__put(struct cpu_aggr_map *map) +{ + if (map && refcount_dec_and_test(&map->refcnt)) + cpu_aggr_map__delete(map); +} + static void perf_stat__exit_aggr_mode(void) { - perf_cpu_map__put(stat_config.aggr_map); - perf_cpu_map__put(stat_config.cpus_aggr_map); + cpu_aggr_map__put(stat_config.aggr_map); + cpu_aggr_map__put(stat_config.cpus_aggr_map); stat_config.aggr_map = NULL; stat_config.cpus_aggr_map = NULL; } @@ -1345,117 +1362,108 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m return cpu; } -static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - return cpu == -1 ? -1 : env->cpu[cpu].socket_id; + if (cpu != -1) + id.socket = env->cpu[cpu].socket_id; + + return id; } -static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; - - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; - - die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.socket = env->cpu[cpu].socket_id; + id.die = env->cpu[cpu].die_id; } - return die_id; + return id; } -static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int core = -1, cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + int cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { /* - * Encode socket in bit range 31:24 - * encode die id in bit range 23:16 * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * we need a global id. So we set + * socket, die id and core id */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return -1; - - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) - return -1; - - if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) - return -1; - - core = (env->cpu[cpu].socket_id << 24) | - (env->cpu[cpu].die_id << 16) | - (env->cpu[cpu].core_id & 0xffff); + id.socket = env->cpu[cpu].socket_id; + id.die = env->cpu[cpu].die_id; + id.core = env->cpu[cpu].core_id; } - return core; + return id; } -static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) { int cpu = perf_env__get_cpu(data, map, idx); + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - return perf_env__numa_node(data, cpu); + id.node = perf_env__numa_node(data, cpu); + return id; } static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **sockp) + struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **diep) + struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, perf_env__get_die, env); } static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **corep) + struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct perf_cpu_map **nodep) + struct cpu_aggr_map **nodep) { return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); } -static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_die(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, +static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu_map *map, int idx) { return perf_env__get_node(map, idx, &perf_stat.session->header.env); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 15ecb1803fb9..dded93a2bc89 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -144,6 +144,8 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl for i in $BEAUTY_FILES; do beauty_check $i -B diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 165feedc7863..74748ed75b2c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -65,9 +65,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) .mode = PERF_DATA_MODE_READ, }; int i; + struct aggr_cpu_id id; session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); + cpu__setup_cpunode_map(); /* On platforms with large numbers of CPUs process_cpu_topology() * might issue an error while reading the perf.data file section @@ -85,11 +87,18 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * "socket_id number is too big. You may need to upgrade the * perf tool." * - * This is the reason why this test might be skipped. + * This is the reason why this test might be skipped. aarch64 and + * s390 always write this part of the header, even when the above + * condition is true (see do_core_id_test in header.c). So always + * run this test on those platforms. */ - if (!session->header.env.cpu) + if (!session->header.env.cpu + && strncmp(session->header.env.arch, "s390", 4) + && strncmp(session->header.env.arch, "aarch64", 7)) return TEST_SKIP; + TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -98,14 +107,57 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[i].socket_id); } + // Test that core ID contains socket, die and core for (i = 0; i < map->nr; i++) { - TEST_ASSERT_VAL("Core ID doesn't match", - (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); + id = cpu_map__get_core(map, i, NULL); + TEST_ASSERT_VAL("Core map - Core ID doesn't match", + session->header.env.cpu[map->map[i]].core_id == id.core); - TEST_ASSERT_VAL("Socket ID doesn't match", - (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); + TEST_ASSERT_VAL("Core map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id.socket); + + TEST_ASSERT_VAL("Core map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == id.die); + TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } + // Test that die ID contains socket and die + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_die(map, i, NULL); + TEST_ASSERT_VAL("Die map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id.socket); + + TEST_ASSERT_VAL("Die map - Die ID doesn't match", + session->header.env.cpu[map->map[i]].die_id == id.die); + + TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); + } + + // Test that socket ID contains only socket + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_socket(map, i, NULL); + TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", + session->header.env.cpu[map->map[i]].socket_id == id.socket); + + TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); + TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); + } + + // Test that node ID contains only node + for (i = 0; i < map->nr; i++) { + id = cpu_map__get_node(map, i, NULL); + TEST_ASSERT_VAL("Node map - Node ID doesn't match", + cpu__get_node(map->map[i]) == id.node); + TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); + TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); + TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); + } perf_session__delete(session); return 0; diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh index 831c02cf0586..27ee1ea1fe94 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h printf "static const char *x86_MSRs[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' -egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \ sed -r "s/$regex/\2 \1/g" | sort -n | \ xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index dc5c5e6fc502..87d3eca9b872 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -95,6 +95,23 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) return cpus; } +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) +{ + struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr); + + if (cpus != NULL) { + int i; + + cpus->nr = nr; + for (i = 0; i < nr; i++) + cpus->map[i] = cpu_map__empty_aggr_cpu_id(); + + refcount_set(&cpus->refcnt, 1); + } + + return cpus; +} + static int cpu__get_topology_int(int cpu, const char *name, int *value) { char path[PATH_MAX]; @@ -111,40 +128,57 @@ int cpu_map__get_socket_id(int cpu) return ret ?: value; } -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, + void *data __maybe_unused) { int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - return cpu_map__get_socket_id(cpu); + id.socket = cpu_map__get_socket_id(cpu); + return id; } -static int cmp_ids(const void *a, const void *b) +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) { - return *(int *)a - *(int *)b; + struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; + struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; + + if (a->node != b->node) + return a->node - b->node; + else if (a->socket != b->socket) + return a->socket - b->socket; + else if (a->die != b->die) + return a->die - b->die; + else if (a->core != b->core) + return a->core - b->core; + else + return a->thread - b->thread; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data) { - struct perf_cpu_map *c; int nr = cpus->nr; - int cpu, s1, s2; + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); + int cpu, s2; + struct aggr_cpu_id s1; - /* allocate as much as possible */ - c = calloc(1, sizeof(*c) + nr * sizeof(int)); if (!c) return -1; + /* Reset size as it may only be partially filled */ + c->nr = 0; + for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1 == c->map[s2]) + if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) break; } if (s2 == c->nr) { @@ -153,9 +187,8 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, } } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(int), cmp_ids); + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); - refcount_set(&c->refcnt, 1); *res = c; return 0; } @@ -167,37 +200,32 @@ int cpu_map__get_die_id(int cpu) return ret ?: value; } -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, die_id, s; + int cpu, die; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; - die_id = cpu_map__get_die_id(cpu); + die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die_id == -1) - die_id = 0; - - s = cpu_map__get_socket(map, idx, data); - if (s == -1) - return -1; + if (die == -1) + die = 0; /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, and - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) - return -1; + id = cpu_map__get_socket(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; - if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) - return -1; - - return (s << 8) | (die_id & 0xff); + id.die = die; + return id; } int cpu_map__get_core_id(int cpu) @@ -211,59 +239,58 @@ int cpu_map__get_node_id(int cpu) return cpu__get_node(cpu); } -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s_die; + int cpu; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) - return -1; + return id; cpu = map->map[idx]; cpu = cpu_map__get_core_id(cpu); - /* s_die is the combination of socket + die id */ - s_die = cpu_map__get_die(map, idx, data); - if (s_die == -1) - return -1; + /* cpu_map__get_die returns a struct with socket and die set*/ + id = cpu_map__get_die(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; /* - * encode socket in bit range 31:24 - * encode die id in bit range 23:16 - * core_id is relative to socket and die, - * we need a global id. So we combine - * socket + die id + core id + * core_id is relative to socket and die, we need a global id. + * So we combine the result from cpu_map__get_die with the core id */ - if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) - return -1; - - return (s_die << 16) | (cpu & 0xffff); + id.core = cpu; + return id; } -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) { - if (idx < 0 || idx >= map->nr) - return -1; + struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - return cpu_map__get_node_id(map->map[idx]); + if (idx < 0 || idx >= map->nr) + return id; + + id.node = cpu_map__get_node_id(map->map[idx]); + return id; } -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp) +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) { return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep) +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) { return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); } -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep) +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); } -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **numap) +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) { return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); } @@ -586,3 +613,33 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } + +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +{ + return a.thread == b.thread && + a.node == b.node && + a.socket == b.socket && + a.die == b.die && + a.core == b.core; +} + +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +{ + return a.thread == -1 && + a.node == -1 && + a.socket == -1 && + a.die == -1 && + a.core == -1; +} + +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +{ + struct aggr_cpu_id ret = { + .thread = -1, + .node = -1, + .socket = -1, + .die = -1, + .core = -1 + }; + return ret; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 3a442f021468..a27eeaf086e8 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -7,25 +7,41 @@ #include #include +struct aggr_cpu_id { + int thread; + int node; + int socket; + int die; + int core; +}; + +struct cpu_aggr_map { + refcount_t refcnt; + int nr; + struct aggr_cpu_id map[]; +}; + struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_die_id(int cpu); -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__get_node_id(int cpu); -int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **nodep); +struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); +int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); +int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); +int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); +int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) @@ -35,21 +51,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_socket(int id) -{ - return id >> 24; -} - -static inline int cpu_map__id_to_die(int id) -{ - return (id >> 16) & 0xff; -} - -static inline int cpu_map__id_to_cpu(int id) -{ - return id & 0xffff; -} - int cpu__setup_cpunode_map(void); int cpu__max_node(void); @@ -57,11 +58,15 @@ int cpu__max_cpu(void); int cpu__max_present_cpu(void); int cpu__get_node(int cpu); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, - int (*f)(struct perf_cpu_map *map, int cpu, void *data), +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, + struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), void *data); int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); +bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); +bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); +struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 064b63a6a3f3..bbecb449ea94 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -791,7 +791,7 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, const char *sdtgrp) { struct strbuf buf; - char *ret = NULL, **args; + char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; @@ -813,12 +813,19 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, goto out; if (note->args) { - args = argv_split(note->args, &args_count); + char **args = argv_split(note->args, &args_count); + + if (args == NULL) + goto error; for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + argv_free(args); goto error; + } } + + argv_free(args); } out: diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index fee7543843a8..583ae4f09c5d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -68,15 +68,15 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, int id, int nr) + struct evsel *evsel, struct aggr_cpu_id id, int nr) { switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + id.socket, + id.die, config->csv_output ? 0 : -8, - cpu_map__id_to_cpu(id), + id.core, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -84,9 +84,9 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id << 16), + id.socket, config->csv_output ? 0 : -8, - cpu_map__id_to_die(id << 16), + id.die, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.socket, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NODE: fprintf(config->output, "N%*d%s%*d%s", config->csv_output ? 0 : -5, - id, + id.node, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,23 +113,23 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id), - cpu_map__id_to_die(id), + id.socket, + id.die, config->csv_output ? 0 : -3, - cpu_map__id_to_cpu(id), config->csv_sep); - } else if (id > -1) { + id.core, config->csv_sep); + } else if (id.core > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id], + evsel__cpus(evsel)->map[id.core], config->csv_sep); } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id), + perf_thread_map__comm(evsel->core.threads, id.thread), config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id), + perf_thread_map__pid(evsel->core.threads, id.thread), config->csv_sep); break; case AGGR_GLOBAL: @@ -144,7 +144,8 @@ struct outstate { bool newline; const char *prefix; int nfields; - int id, nr; + int nr; + struct aggr_cpu_id id; struct evsel *evsel; }; @@ -319,13 +320,13 @@ static void print_metric_header(struct perf_stat_config *config, } static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, int id) + struct evsel *evsel, struct aggr_cpu_id id) { struct evlist *evlist = evsel->evlist; int i; if (config->aggr_mode == AGGR_NONE) - return id; + return id.core; if (!config->aggr_get_id) return 0; @@ -333,14 +334,17 @@ static int first_shadow_cpu(struct perf_stat_config *config, for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; - if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) + if (cpu_map__compare_aggr_cpu_id( + config->aggr_get_id(config, evlist->core.cpus, cpu2), + id)) { return cpu2; + } } return 0; } static void abs_printout(struct perf_stat_config *config, - int id, int nr, struct evsel *evsel, double avg) + struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) { FILE *output = config->output; double sc = evsel->scale; @@ -393,7 +397,7 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } -static void printout(struct perf_stat_config *config, int id, int nr, +static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) @@ -496,7 +500,8 @@ static void printout(struct perf_stat_config *config, int id, int nr, static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s2, id, s; + int cpu, s; + struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; @@ -506,7 +511,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, val = 0; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); - if (s2 != id) + if (!cpu_map__compare_aggr_cpu_id(s2, id)) continue; val += perf_counts(counter->counts, cpu, 0)->val; } @@ -584,7 +589,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, struct aggr_data { u64 ena, run, val; - int id; + struct aggr_cpu_id id; int nr; int cpu; }; @@ -593,13 +598,14 @@ static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu, s2; + int cpu; + struct aggr_cpu_id s2; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); - if (s2 != ad->id) + if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) continue; if (first) ad->nr++; @@ -628,7 +634,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int id, nr; + int nr; + struct aggr_cpu_id id; double uval; ad.id = id = config->aggr_map->map[s]; @@ -649,8 +656,12 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + if (cpu != -1) { + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; + } + printout(config, id, nr, counter, uval, + prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); } @@ -728,7 +739,8 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = thread; + buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; buf[i].run = run; @@ -751,7 +763,8 @@ static void print_aggr_thread(struct perf_stat_config *config, FILE *output = config->output; int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = perf_cpu_map__nr(counter->core.cpus); - int thread, sorted_threads, id; + int thread, sorted_threads; + struct aggr_cpu_id id; struct perf_aggr_thread_value *buf; buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); @@ -768,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config, if (config->stats) printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, - &config->stats[id]); + &config->stats[id.thread]); else printout(config, id, 0, buf[thread].counter, buf[thread].uval, prefix, buf[thread].run, buf[thread].ena, 1.0, @@ -814,8 +827,8 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, - cd.avg, &rt_stat); + printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -842,6 +855,7 @@ static void print_counter(struct perf_stat_config *config, u64 ena, run, val; double uval; int cpu; + struct aggr_cpu_id id; for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; @@ -856,8 +870,10 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); fputc('\n', output); } @@ -872,6 +888,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evsel *counter; u64 ena, run, val; double uval; + struct aggr_cpu_id id; nrcpus = evlist->core.cpus->nr; for (cpu = 0; cpu < nrcpus; cpu++) { @@ -880,8 +897,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { + id = cpu_map__empty_aggr_cpu_id(); + id.core = cpu; if (first) { - aggr_printout(config, counter, cpu, 0); + aggr_printout(config, counter, id, 0); first = false; } val = perf_counts(counter->counts, cpu, 0)->val; @@ -889,8 +908,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + printout(config, id, 0, counter, uval, prefix, + run, ena, 1.0, &rt_stat); } fputc('\n', config->output); } @@ -1140,14 +1159,15 @@ static void print_footer(struct perf_stat_config *config) static void print_percore_thread(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - int s, s2, id; + int s; + struct aggr_cpu_id s2, id; bool first = true; for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; - if (s2 == id) + if (cpu_map__compare_aggr_cpu_id(s2, id)) break; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1e125e39ff84..8ce1479c98f0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL); + s = cpu_map__get_socket(cpus, cpu, NULL).socket; if (s < 0) return -1; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 9979b4b100f2..b5369730b4a2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -6,6 +6,7 @@ #include #include #include +#include "cpumap.h" #include "rblist.h" struct perf_cpu_map; @@ -99,7 +100,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef int (*aggr_get_id_t)(struct perf_stat_config *config, +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu_map *m, int cpu); struct perf_stat_config { @@ -138,9 +139,9 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct perf_cpu_map *aggr_map; + struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; - struct perf_cpu_map *cpus_aggr_map; + struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; struct rblist metric_events; int ctl_fd; @@ -170,7 +171,7 @@ struct evlist; struct perf_aggr_thread_value { struct evsel *counter; - int id; + struct aggr_cpu_id id; double uval; u64 val; u64 run; diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 04d563fc9b95..468435ed64e6 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -16,6 +16,16 @@ # define mb() abort() # define dma_rmb() abort() # define dma_wmb() abort() +#elif defined(__aarch64__) +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() dmb(ishld) +#define virt_wmb() dmb(ishst) +#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0) +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h index b14c2c3b6b85..813baf13f62a 100644 --- a/tools/virtio/linux/bug.h +++ b/tools/virtio/linux/bug.h @@ -2,6 +2,8 @@ #ifndef BUG_H #define BUG_H +#include + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUILD_BUG_ON(x) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 315e85cabeda..0b493542e61a 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr) # define unlikely(x) (__builtin_expect(!!(x), 0)) # endif +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, gfp); +} + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #ifdef DEBUG #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) @@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0) - #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \