diff --git a/.mailmap b/.mailmap index 2216b5d5c84e..f877262b0be9 100644 --- a/.mailmap +++ b/.mailmap @@ -20,6 +20,7 @@ Adam Oldham Adam Radford Adriana Reus Adrian Bunk +Ajay Kaher Akhil P Oommen Alan Cox Alan Cox @@ -36,6 +37,7 @@ Alexei Avshalom Lazar Alexei Starovoitov Alexei Starovoitov Alexei Starovoitov +Alexey Makhalov Alex Hung Alex Shi Alex Shi @@ -110,6 +112,7 @@ Brendan Higgins Brian Avery Brian King Brian Silverman +Bryan Tan Cai Huoqing Can Guo Carl Huang @@ -340,7 +343,8 @@ Lee Jones Lee Jones Lee Jones Lee Jones -Leonard Crestez Leonard Crestez +Leonard Crestez +Leonard Crestez Leonardo Bras Leonard Göhrs Leonid I Ananiev @@ -442,7 +446,8 @@ Mythri P K Nadav Amit Nadav Amit Nadia Yvette Chambers William Lee Irwin III -Naoya Horiguchi +Naoya Horiguchi +Naoya Horiguchi Nathan Chancellor Neeraj Upadhyay Neil Armstrong @@ -497,7 +502,8 @@ Prasad Sodagudi Punit Agrawal Qais Yousef Qais Yousef -Quentin Monnet +Quentin Monnet +Quentin Monnet Quentin Perret Rafael J. Wysocki Rajeev Nandan @@ -519,6 +525,7 @@ Rémi Denis-Courmont Ricardo Ribalda Ricardo Ribalda Ricardo Ribalda Delgado Ricardo Ribalda +Richard Genoud Richard Leitner Richard Leitner Richard Leitner @@ -527,6 +534,7 @@ Rocky Liao Roman Gushchin Roman Gushchin Roman Gushchin +Ronak Doshi Muchun Song Muchun Song Ross Zwisler @@ -649,6 +657,7 @@ Viresh Kumar Viresh Kumar Viresh Kumar Viresh Kumar +Vishnu Dasa Vivek Aknurwar Vivien Didelot Vlad Dogaru diff --git a/CREDITS b/CREDITS index c55c5a0ee4ff..0107047f807b 100644 --- a/CREDITS +++ b/CREDITS @@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55 S: 13353 Berlin S: Germany +N: Gustavo Pimental +E: gustavo.pimentel@synopsys.com +D: PCI driver for Synopsys DesignWare + N: Emanuel Pirker E: epirker@edu.uni-klu.ac.at D: AIC5800 IEEE 1394, RAW I/O on 1394 diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index cce768afec6b..25a04cda4c2c 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, the BHB might be shared across privilege levels even in the presence of Enhanced IBRS. -Currently the only known real-world BHB attack vector is via -unprivileged eBPF. Therefore, it's highly recommended to not enable -unprivileged eBPF, especially when eIBRS is used (without retpolines). -For a full mitigation against BHB attacks, it's recommended to use -retpolines (or eIBRS combined with retpolines). +Previously the only known real-world BHB attack vector was via unprivileged +eBPF. Further research has found attacks that don't require unprivileged eBPF. +For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or +use the BHB clearing sequence. Attack scenarios ---------------- @@ -430,6 +429,23 @@ The possible values in this file are: 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB =========================== ======================================================= + - Branch History Injection (BHI) protection status: + +.. list-table:: + + * - BHI: Not affected + - System is not affected + * - BHI: Retpoline + - System is protected by retpoline + * - BHI: BHI_DIS_S + - System is protected by BHI_DIS_S + * - BHI: SW loop, KVM SW loop + - System is protected by software clearing sequence + * - BHI: Vulnerable + - System is vulnerable to BHI + * - BHI: Vulnerable, KVM: SW loop + - System is vulnerable; KVM is protected by software clearing sequence + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -484,7 +500,11 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks. + some Spectre v2 variant attacks. The BHB can still influence the choice of + indirect branch predictor entry, and although branch predictor entries are + isolated between modes when eIBRS is enabled, the BHB itself is not isolated + between modes. Systems which support BHI_DIS_S will set it to protect against + BHI attacks. On Intel's enhanced IBRS systems, this includes cross-thread branch target injections on SMT systems (STIBP). In other words, Intel eIBRS enables @@ -638,6 +658,18 @@ kernel command line. spectre_v2=off. Spectre variant 1 mitigations cannot be disabled. + spectre_bhi= + + [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. This setting affects the deployment + of the HW BHI control and the SW BHB clearing sequence. + + on + (default) Enable the HW or SW mitigation as + needed. + off + Disable the mitigation. + For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt Mitigation selection guide diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bb884c14b2f6..902ecd92a29f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3444,6 +3444,7 @@ retbleed=off [X86] spec_rstack_overflow=off [X86] spec_store_bypass_disable=off [X86,PPC] + spectre_bhi=off [X86] spectre_v2_user=off [X86] srbds=off [X86,INTEL] ssbd=force-off [ARM64] @@ -6063,6 +6064,15 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst + spectre_bhi= [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. This setting affects the + deployment of the HW BHI control and the SW BHB + clearing sequence. + + on - (default) Enable the HW or SW mitigation + as needed. + off - Disable the mitigation. + spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. The default operation protects the kernel from @@ -6599,7 +6609,7 @@ To turn off having tracepoints sent to printk, echo 0 > /proc/sys/kernel/tracepoint_printk Note, echoing 1 into this file without the - tracepoint_printk kernel cmdline option has no effect. + tp_printk kernel cmdline option has no effect. The tp_printk_stop_on_boot (see below) can also be used to stop the printing of events to console at diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index b42132969e31..13632671adae 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -155,7 +155,7 @@ Setting this parameter to 100 will disable the hysteresis. Some users cannot tolerate the swapping that comes with zswap store failures and zswap writebacks. Swapping can be disabled entirely (without disabling -zswap itself) on a cgroup-basis as follows: +zswap itself) on a cgroup-basis as follows:: echo 0 > /sys/fs/cgroup//memory.zswap.writeback @@ -166,7 +166,7 @@ writeback (because the same pages might be rejected again and again). When there is a sizable amount of cold memory residing in the zswap pool, it can be advantageous to proactively write these cold pages to swap and reclaim the memory for other use cases. By default, the zswap shrinker is disabled. -User can enable it as follows: +User can enable it as follows:: echo Y > /sys/module/zswap/parameters/shrinker_enabled diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 3712d81cb50c..6c245582d8fb 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -574,7 +574,7 @@ Memory b/w domain is L3 cache. MB:=bandwidth0;=bandwidth1;... Memory bandwidth Allocation specified in MiBps ---------------------------------------------- +---------------------------------------------- Memory bandwidth domain is L3 cache. :: diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst index 0aaf6ea53608..1619e5e5cc9c 100644 --- a/Documentation/dev-tools/testing-overview.rst +++ b/Documentation/dev-tools/testing-overview.rst @@ -104,6 +104,8 @@ Some of these tools are listed below: KASAN and can be used in production. See Documentation/dev-tools/kfence.rst * lockdep is a locking correctness validator. See Documentation/locking/lockdep-design.rst +* Runtime Verification (RV) supports checking specific behaviours for a given + subsystem. See Documentation/trace/rv/runtime-verification.rst * There are several other pieces of debug instrumentation in the kernel, many of which can be found in lib/Kconfig.debug diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt index c5aa187026e3..43f6fb6c9392 100644 --- a/Documentation/devicetree/bindings/clock/keystone-gate.txt +++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt @@ -1,5 +1,3 @@ -Status: Unstable - ABI compatibility may be broken in the future - Binding for Keystone gate control driver which uses PSC controller IP. This binding uses the common clock binding[1]. diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt index 9a3fbc665606..69b0eb7c03c9 100644 --- a/Documentation/devicetree/bindings/clock/keystone-pll.txt +++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt @@ -1,5 +1,3 @@ -Status: Unstable - ABI compatibility may be broken in the future - Binding for keystone PLLs. The main PLL IP typically has a multiplier, a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL and PAPLL are controlled by the memory mapped register where as the Main diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt index 4c8a2ce2cd70..3122360adcf3 100644 --- a/Documentation/devicetree/bindings/clock/ti/adpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments ADPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped ADPLL with two to three selectable input clocks and three to four children. diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt index ade4dd4c30f0..bbd505c1199d 100644 --- a/Documentation/devicetree/bindings/clock/ti/apll.txt +++ b/Documentation/devicetree/bindings/clock/ti/apll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments APLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped APLL with usually two selectable input clocks (reference clock and bypass clock), with analog phase locked diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt index 7c735dde9fe9..05645a10a9e3 100644 --- a/Documentation/devicetree/bindings/clock/ti/autoidle.txt +++ b/Documentation/devicetree/bindings/clock/ti/autoidle.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments autoidle clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register mapped clock which can be put to idle automatically by hardware based on the usage and a configuration bit setting. Autoidle clock is never an individual diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt index 9c6199249ce5..edf0b5d42768 100644 --- a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt +++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments clockdomain. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1] in consumer role. Every clock on TI SoC belongs to one clockdomain, but software only needs this information for specific clocks which require diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt index 33ac7c9ad053..6f7e1331b546 100644 --- a/Documentation/devicetree/bindings/clock/ti/composite.txt +++ b/Documentation/devicetree/bindings/clock/ti/composite.txt @@ -1,7 +1,5 @@ Binding for TI composite clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped composite clock with multiple different sub-types; diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt index 9b13b32974f9..4d7c76f0b356 100644 --- a/Documentation/devicetree/bindings/clock/ti/divider.txt +++ b/Documentation/devicetree/bindings/clock/ti/divider.txt @@ -1,7 +1,5 @@ Binding for TI divider clock -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped adjustable clock rate divider that does not gate and has only one input clock or parent. By default the value programmed into diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt index 37a7cb6ad07d..14a1b72c2e71 100644 --- a/Documentation/devicetree/bindings/clock/ti/dpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments DPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped DPLL with usually two selectable input clocks (reference clock and bypass clock), with digital phase locked diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt index c19b3f253b8c..88986ef39ddd 100644 --- a/Documentation/devicetree/bindings/clock/ti/fapll.txt +++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments FAPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped FAPLL with usually two selectable input clocks (reference clock and bypass clock), and one or more child diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt index 518e3c142276..dc69477b6e98 100644 --- a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt +++ b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt @@ -1,7 +1,5 @@ Binding for TI fixed factor rate clock sources. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1], and also uses the autoidle support from TI autoidle clock [2]. diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt index 4982615c01b9..a8e0335b006a 100644 --- a/Documentation/devicetree/bindings/clock/ti/gate.txt +++ b/Documentation/devicetree/bindings/clock/ti/gate.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments gate clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. This clock is quite much similar to the basic gate-clock [2], however, it supports a number of additional features. If no register diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt index d3eb5ca92a7f..85fb1f2d2d28 100644 --- a/Documentation/devicetree/bindings/clock/ti/interface.txt +++ b/Documentation/devicetree/bindings/clock/ti/interface.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments interface clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. This clock is quite much similar to the basic gate-clock [2], however, it supports a number of additional features, including diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt index b33f641f1043..cd56d3c1c09f 100644 --- a/Documentation/devicetree/bindings/clock/ti/mux.txt +++ b/Documentation/devicetree/bindings/clock/ti/mux.txt @@ -1,7 +1,5 @@ Binding for TI mux clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped multiplexer with multiple input clock signals or parents, one of which can be selected as output. This clock does not diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml index c0d6a4fdff97..e6dc5494baee 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml @@ -53,6 +53,15 @@ patternProperties: compatible: const: qcom,sm8150-dpu + "^displayport-controller@[0-9a-f]+$": + type: object + additionalProperties: true + + properties: + compatible: + contains: + const: qcom,sm8150-dp + "^dsi@[0-9a-f]+$": type: object additionalProperties: true diff --git a/Documentation/devicetree/bindings/dts-coding-style.rst b/Documentation/devicetree/bindings/dts-coding-style.rst index a9bdd2b59dca..8a68331075a0 100644 --- a/Documentation/devicetree/bindings/dts-coding-style.rst +++ b/Documentation/devicetree/bindings/dts-coding-style.rst @@ -144,6 +144,8 @@ Example:: #dma-cells = <1>; clocks = <&clock_controller 0>, <&clock_controller 1>; clock-names = "bus", "host"; + #address-cells = <1>; + #size-cells = <1>; vendor,custom-property = <2>; status = "disabled"; diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml index 528ef3572b62..055a3351880b 100644 --- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml @@ -94,6 +94,10 @@ properties: local-bd-address: true + qcom,local-bd-address-broken: + type: boolean + description: + boot firmware is incorrectly passing the address in big-endian order required: - compatible diff --git a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml index afcdeed4e88a..bc813fe74fab 100644 --- a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml +++ b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml @@ -52,6 +52,9 @@ properties: - const: main - const: mm + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt index 25f8658e216f..48a49c516b62 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt +++ b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt @@ -1,9 +1,6 @@ TI Davinci DSP devices ======================= -Binding status: Unstable - Subject to changes for DT representation of clocks - and resets - The TI Davinci family of SoCs usually contains a TI DSP Core sub-system that is used to offload some of the processor-intensive tasks or algorithms, for achieving various system level goals. diff --git a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml index 65cb2e5c5eee..eb2992a447d7 100644 --- a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml +++ b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Universal Synchronous Asynchronous Receiver/Transmitter (USART) maintainers: - - Richard Genoud + - Richard Genoud properties: compatible: diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml index 397f75909b20..ce1a6505eb51 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml @@ -51,7 +51,7 @@ properties: ranges: true patternProperties: - "^clock-controller@[0-9a-z]+$": + "^clock-controller@[0-9a-f]+$": $ref: /schemas/clock/fsl,flexspi-clock.yaml# required: diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml index 8d088b5fe823..a6a511b00a12 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml @@ -41,7 +41,7 @@ properties: ranges: true patternProperties: - "^interrupt-controller@[a-z0-9]+$": + "^interrupt-controller@[a-f0-9]+$": $ref: /schemas/interrupt-controller/fsl,ls-extirq.yaml# required: diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml index 7a4a6ab85970..ab8f28993139 100644 --- a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml +++ b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml @@ -60,7 +60,7 @@ properties: be implemented in an always-on power domain." patternProperties: - '^frame@[0-9a-z]*$': + '^frame@[0-9a-f]+$': type: object additionalProperties: false description: A timer node has up to 8 frame sub-nodes, each with the following properties. diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml index 10c146424baa..cd3680dc002f 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml @@ -27,10 +27,13 @@ properties: - qcom,msm8996-ufshc - qcom,msm8998-ufshc - qcom,sa8775p-ufshc + - qcom,sc7180-ufshc - qcom,sc7280-ufshc + - qcom,sc8180x-ufshc - qcom,sc8280xp-ufshc - qcom,sdm845-ufshc - qcom,sm6115-ufshc + - qcom,sm6125-ufshc - qcom,sm6350-ufshc - qcom,sm8150-ufshc - qcom,sm8250-ufshc @@ -42,11 +45,11 @@ properties: - const: jedec,ufs-2.0 clocks: - minItems: 8 + minItems: 7 maxItems: 11 clock-names: - minItems: 8 + minItems: 7 maxItems: 11 dma-coherent: true @@ -112,6 +115,31 @@ required: allOf: - $ref: ufs-common.yaml + - if: + properties: + compatible: + contains: + enum: + - qcom,sc7180-ufshc + then: + properties: + clocks: + minItems: 7 + maxItems: 7 + clock-names: + items: + - const: core_clk + - const: bus_aggr_clk + - const: iface_clk + - const: core_clk_unipro + - const: ref_clk + - const: tx_lane0_sync_clk + - const: rx_lane0_sync_clk + reg: + maxItems: 1 + reg-names: + maxItems: 1 + - if: properties: compatible: @@ -120,6 +148,7 @@ allOf: - qcom,msm8998-ufshc - qcom,sa8775p-ufshc - qcom,sc7280-ufshc + - qcom,sc8180x-ufshc - qcom,sc8280xp-ufshc - qcom,sm8250-ufshc - qcom,sm8350-ufshc @@ -215,6 +244,7 @@ allOf: contains: enum: - qcom,sm6115-ufshc + - qcom,sm6125-ufshc then: properties: clocks: @@ -248,7 +278,7 @@ allOf: reg: maxItems: 1 clocks: - minItems: 8 + minItems: 7 maxItems: 8 else: properties: @@ -256,7 +286,7 @@ allOf: minItems: 1 maxItems: 2 clocks: - minItems: 8 + minItems: 7 maxItems: 11 unevaluatedProperties: false diff --git a/Documentation/driver-api/virtio/writing_virtio_drivers.rst b/Documentation/driver-api/virtio/writing_virtio_drivers.rst index e14c58796d25..e5de6f5d061a 100644 --- a/Documentation/driver-api/virtio/writing_virtio_drivers.rst +++ b/Documentation/driver-api/virtio/writing_virtio_drivers.rst @@ -97,7 +97,6 @@ like this:: static struct virtio_driver virtio_dummy_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_dummy_probe, .remove = virtio_dummy_remove, diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst new file mode 100644 index 000000000000..e2bd61ccd96f --- /dev/null +++ b/Documentation/filesystems/bcachefs/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================== +bcachefs Documentation +====================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + errorcodes diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 0ea1e44fa028..1f9b4c905a6a 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -69,6 +69,7 @@ Documentation for filesystem implementations. afs autofs autofs-mount-control + bcachefs/index befs bfs btrfs diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index b1d97fafddcf..bb5c44f8bd1c 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -178,7 +178,7 @@ yet. Bug reports are always welcome at the issue tracker below! - ``LLVM=1`` * - s390 - Maintained - - ``CC=clang`` + - ``LLVM=1`` (LLVM >= 18.1.0), ``CC=clang`` (LLVM < 18.1.0) * - um (User Mode) - Maintained - ``LLVM=1`` diff --git a/Documentation/mm/page_owner.rst b/Documentation/mm/page_owner.rst index 0d0334cd5179..3a45a20fc05a 100644 --- a/Documentation/mm/page_owner.rst +++ b/Documentation/mm/page_owner.rst @@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of each page. It is already implemented and activated if page owner is enabled. Other usages are more than welcome. -It can also be used to show all the stacks and their outstanding -allocations, which gives us a quick overview of where the memory is going -without the need to screen through all the pages and match the allocation -and free operation. +It can also be used to show all the stacks and their current number of +allocated base pages, which gives us a quick overview of where the memory +is going without the need to screen through all the pages and match the +allocation and free operation. page owner is disabled by default. So, if you'd like to use it, you need to add "page_owner=on" to your boot cmdline. If the kernel is built @@ -75,42 +75,45 @@ Usage cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt cat stacks.txt - prep_new_page+0xa9/0x120 - get_page_from_freelist+0x7e6/0x2140 - __alloc_pages+0x18a/0x370 - new_slab+0xc8/0x580 - ___slab_alloc+0x1f2/0xaf0 - __slab_alloc.isra.86+0x22/0x40 - kmem_cache_alloc+0x31b/0x350 - __khugepaged_enter+0x39/0x100 - dup_mmap+0x1c7/0x5ce - copy_process+0x1afe/0x1c90 - kernel_clone+0x9a/0x3c0 - __do_sys_clone+0x66/0x90 - do_syscall_64+0x7f/0x160 - entry_SYSCALL_64_after_hwframe+0x6c/0x74 - stack_count: 234 + post_alloc_hook+0x177/0x1a0 + get_page_from_freelist+0xd01/0xd80 + __alloc_pages+0x39e/0x7e0 + allocate_slab+0xbc/0x3f0 + ___slab_alloc+0x528/0x8a0 + kmem_cache_alloc+0x224/0x3b0 + sk_prot_alloc+0x58/0x1a0 + sk_alloc+0x32/0x4f0 + inet_create+0x427/0xb50 + __sock_create+0x2e4/0x650 + inet_ctl_sock_create+0x30/0x180 + igmp_net_init+0xc1/0x130 + ops_init+0x167/0x410 + setup_net+0x304/0xa60 + copy_net_ns+0x29b/0x4a0 + create_new_namespaces+0x4a1/0x820 + nr_base_pages: 16 ... ... echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt cat stacks_7000.txt - prep_new_page+0xa9/0x120 - get_page_from_freelist+0x7e6/0x2140 - __alloc_pages+0x18a/0x370 - alloc_pages_mpol+0xdf/0x1e0 - folio_alloc+0x14/0x50 - filemap_alloc_folio+0xb0/0x100 - page_cache_ra_unbounded+0x97/0x180 - filemap_fault+0x4b4/0x1200 - __do_fault+0x2d/0x110 - do_pte_missing+0x4b0/0xa30 - __handle_mm_fault+0x7fa/0xb70 - handle_mm_fault+0x125/0x300 - do_user_addr_fault+0x3c9/0x840 - exc_page_fault+0x68/0x150 - asm_exc_page_fault+0x22/0x30 - stack_count: 8248 + post_alloc_hook+0x177/0x1a0 + get_page_from_freelist+0xd01/0xd80 + __alloc_pages+0x39e/0x7e0 + alloc_pages_mpol+0x22e/0x490 + folio_alloc+0xd5/0x110 + filemap_alloc_folio+0x78/0x230 + page_cache_ra_order+0x287/0x6f0 + filemap_get_pages+0x517/0x1160 + filemap_read+0x304/0x9f0 + xfs_file_buffered_read+0xe6/0x1d0 [xfs] + xfs_file_read_iter+0x1f0/0x380 [xfs] + __kernel_read+0x3b9/0x730 + kernel_read_file+0x309/0x4d0 + __do_sys_finit_module+0x381/0x730 + do_syscall_64+0x8d/0x150 + entry_SYSCALL_64_after_hwframe+0x62/0x6a + nr_base_pages: 20824 ... cat /sys/kernel/debug/page_owner > page_owner_full.txt diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst new file mode 100644 index 000000000000..08bb39ab1528 --- /dev/null +++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst @@ -0,0 +1,76 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================== +Devlink E-Switch Attribute +========================== + +Devlink E-Switch supports two modes of operation: legacy and switchdev. +Legacy mode operates based on traditional MAC/VLAN steering rules. Switching +decisions are made based on MAC addresses, VLANs, etc. There is limited ability +to offload switching rules to hardware. + +On the other hand, switchdev mode allows for more advanced offloading +capabilities of the E-Switch to hardware. In switchdev mode, more switching +rules and logic can be offloaded to the hardware switch ASIC. It enables +representor netdevices that represent the slow path of virtual functions (VFs) +or scalable-functions (SFs) of the device. See more information about +:ref:`Documentation/networking/switchdev.rst ` and +:ref:`Documentation/networking/representors.rst `. + +In addition, the devlink E-Switch also comes with other attributes listed +in the following section. + +Attributes Description +====================== + +The following is a list of E-Switch attributes. + +.. list-table:: E-Switch attributes + :widths: 8 5 45 + + * - Name + - Type + - Description + * - ``mode`` + - enum + - The mode of the device. The mode can be one of the following: + + * ``legacy`` operates based on traditional MAC/VLAN steering + rules. + * ``switchdev`` allows for more advanced offloading capabilities of + the E-Switch to hardware. + * - ``inline-mode`` + - enum + - Some HWs need the VF driver to put part of the packet + headers on the TX descriptor so the e-switch can do proper + matching and steering. Support for both switchdev mode and legacy mode. + + * ``none`` none. + * ``link`` L2 mode. + * ``network`` L3 mode. + * ``transport`` L4 mode. + * - ``encap-mode`` + - enum + - The encapsulation mode of the device. Support for both switchdev mode + and legacy mode. The mode can be one of the following: + + * ``none`` Disable encapsulation support. + * ``basic`` Enable encapsulation support. + +Example Usage +============= + +.. code:: shell + + # enable switchdev mode + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev + + # set inline-mode and encap-mode + $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic + + # display devlink device eswitch attributes + $ devlink dev eswitch show pci/0000:08:00.0 + pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic + + # enable encap-mode with legacy mode + $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index e14d7a701b72..948c8c44e233 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -67,6 +67,7 @@ general. devlink-selftests devlink-trap devlink-linecard + devlink-eswitch-attr Driver-specific documentation ----------------------------- diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index decb39c19b9e..5e23386f6968 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _representors: ============================= Network Function Representors diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index bb2100228cc7..6e9a4597bf2c 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -252,7 +252,7 @@ an involved disclosed party. The current ambassadors list: AMD Tom Lendacky Ampere Darren Hart ARM Catalin Marinas - IBM Power Anton Blanchard + IBM Power Michael Ellerman IBM Z Christian Borntraeger Intel Tony Luck Qualcomm Trilok Soni diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 995780088eb2..84335d119ff1 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -46,21 +46,16 @@ SEV hardware uses ASIDs to associate a memory encryption key with a VM. Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value defined in the CPUID 0x8000001f[ecx] field. -SEV Key Management -================== +The KVM_MEMORY_ENCRYPT_OP ioctl +=============================== -The SEV guest key management is handled by a separate processor called the AMD -Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure -key management interface to perform common hypervisor activities such as -encrypting bootstrap code, snapshot, migrating and debugging the guest. For more -information, see the SEV Key Management spec [api-spec]_ - -The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument -to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled -and ``ENOTTY`` if it is disabled (on some older versions of Linux, -the ioctl runs normally even with a NULL argument, and therefore will -likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP -must be a struct kvm_sev_cmd:: +The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on +the VM file descriptor. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL, +the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled +(on some older versions of Linux, the ioctl tries to run normally even +with a NULL argument, and therefore will likely return ``EFAULT`` instead +of zero if SEV is enabled). If non-NULL, the argument to +KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd:: struct kvm_sev_cmd { __u32 id; @@ -87,10 +82,6 @@ guests, such as launching, running, snapshotting, migrating and decommissioning. The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform context. In a typical workflow, this command should be the first command issued. -The firmware can be initialized either by using its own non-volatile storage or -the OS can manage the NV storage for the firmware using the module parameter -``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or -is invalid, the OS will create or override the file with output from PSP. Returns: 0 on success, -negative on error @@ -434,6 +425,21 @@ issued by the hypervisor to make the guest ready for execution. Returns: 0 on success, -negative on error +Firmware Management +=================== + +The SEV guest key management is handled by a separate processor called the AMD +Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure +key management interface to perform common hypervisor activities such as +encrypting bootstrap code, snapshot, migrating and debugging the guest. For more +information, see the SEV Key Management spec [api-spec]_ + +The AMD-SP firmware can be initialized either by using its own non-volatile +storage or the OS can manage the NV storage for the firmware using +parameter ``init_ex_path`` of the ``ccp`` module. If the file specified +by ``init_ex_path`` does not exist or is invalid, the OS will create or +override the file with PSP non-volatile storage. + References ========== diff --git a/Documentation/virt/kvm/x86/msr.rst b/Documentation/virt/kvm/x86/msr.rst index 9315fc385fb0..3aecf2a70e7b 100644 --- a/Documentation/virt/kvm/x86/msr.rst +++ b/Documentation/virt/kvm/x86/msr.rst @@ -193,8 +193,8 @@ data: Asynchronous page fault (APF) control MSR. Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area - which must be in guest RAM and must be zeroed. This memory is expected - to hold a copy of the following structure:: + which must be in guest RAM. This memory is expected to hold the + following structure:: struct kvm_vcpu_pv_apf_data { /* Used for 'page not present' events delivered via #PF */ @@ -204,7 +204,6 @@ data: __u32 token; __u8 pad[56]; - __u32 enabled; }; Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1 @@ -232,14 +231,14 @@ data: as regular page fault, guest must reset 'flags' to '0' before it does something that can generate normal page fault. - Bytes 5-7 of 64 byte memory location ('token') will be written to by the + Bytes 4-7 of 64 byte memory location ('token') will be written to by the hypervisor at the time of APF 'page ready' event injection. The content - of these bytes is a token which was previously delivered as 'page not - present' event. The event indicates the page in now available. Guest is - supposed to write '0' to 'token' when it is done handling 'page ready' - event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location; - writing to the MSR forces KVM to re-scan its queue and deliver the next - pending notification. + of these bytes is a token which was previously delivered in CR2 as + 'page not present' event. The event indicates the page is now available. + Guest is supposed to write '0' to 'token' when it is done handling + 'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after + clearing the location; writing to the MSR forces KVM to re-scan its + queue and deliver the next pending notification. Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page ready' APF delivery needs to be written to before enabling APF mechanism diff --git a/MAINTAINERS b/MAINTAINERS index aa3b947fb080..ebf03f5f0619 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2191,7 +2191,6 @@ N: mxs ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE M: Shawn Guo -M: Li Yang L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git @@ -2708,7 +2707,7 @@ F: sound/soc/rockchip/ N: rockchip ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org @@ -3573,6 +3572,7 @@ S: Supported C: irc://irc.oftc.net/bcache T: git https://evilpiepirate.org/git/bcachefs.git F: fs/bcachefs/ +F: Documentation/filesystems/bcachefs/ BDISP ST MEDIA DRIVER M: Fabien Dessenne @@ -3942,8 +3942,7 @@ F: kernel/bpf/ringbuf.c BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF) M: KP Singh -R: Florent Revest -R: Brendan Jackman +R: Matt Bobrowski L: bpf@vger.kernel.org S: Maintained F: Documentation/bpf/prog_lsm.rst @@ -3968,7 +3967,7 @@ F: kernel/bpf/bpf_lru* F: kernel/bpf/cgroup.c BPF [TOOLING] (bpftool) -M: Quentin Monnet +M: Quentin Monnet L: bpf@vger.kernel.org S: Maintained F: kernel/bpf/disasm.* @@ -4870,7 +4869,6 @@ F: drivers/power/supply/cw2015_battery.c CEPH COMMON CODE (LIBCEPH) M: Ilya Dryomov M: Xiubo Li -R: Jeff Layton L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ @@ -4882,7 +4880,6 @@ F: net/ceph/ CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) M: Xiubo Li M: Ilya Dryomov -R: Jeff Layton L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ @@ -5558,7 +5555,7 @@ F: drivers/cpuidle/cpuidle-big_little.c CPUIDLE DRIVER - ARM EXYNOS M: Daniel Lezcano M: Kukjin Kim -R: Krzysztof Kozlowski +R: Krzysztof Kozlowski L: linux-pm@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -6157,7 +6154,6 @@ DEVICE-MAPPER (LVM) M: Alasdair Kergon M: Mike Snitzer M: Mikulas Patocka -M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained Q: http://patchwork.kernel.org/project/dm-devel/list/ @@ -6173,7 +6169,6 @@ F: include/uapi/linux/dm-*.h DEVICE-MAPPER VDO TARGET M: Matthew Sakai -M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained F: Documentation/admin-guide/device-mapper/vdo*.rst @@ -7941,6 +7936,7 @@ M: Gao Xiang M: Chao Yu R: Yue Hu R: Jeffle Xu +R: Sandeep Dhavale L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org @@ -8525,7 +8521,6 @@ S: Maintained F: drivers/video/fbdev/fsl-diu-fb.* FREESCALE DMA DRIVER -M: Li Yang M: Zhang Wei L: linuxppc-dev@lists.ozlabs.org S: Maintained @@ -8690,10 +8685,9 @@ F: drivers/soc/fsl/qe/tsa.h F: include/dt-bindings/soc/cpm1-fsl,tsa.h FREESCALE QUICC ENGINE UCC ETHERNET DRIVER -M: Li Yang L: netdev@vger.kernel.org L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Orphan F: drivers/net/ethernet/freescale/ucc_geth* FREESCALE QUICC ENGINE UCC HDLC DRIVER @@ -8710,10 +8704,9 @@ S: Maintained F: drivers/tty/serial/ucc_uart.c FREESCALE SOC DRIVERS -M: Li Yang L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ @@ -8747,17 +8740,15 @@ F: Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml F: sound/soc/fsl/fsl_qmc_audio.c FREESCALE USB PERIPHERAL DRIVERS -M: Li Yang L: linux-usb@vger.kernel.org L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Orphan F: drivers/usb/gadget/udc/fsl* FREESCALE USB PHY DRIVER -M: Ran Wang L: linux-usb@vger.kernel.org L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Orphan F: drivers/usb/phy/phy-fsl-usb* FREEVXFS FILESYSTEM @@ -9002,7 +8993,7 @@ F: drivers/i2c/muxes/i2c-mux-gpio.c F: include/linux/platform_data/i2c-mux-gpio.h GENERIC GPIO RESET DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski S: Maintained F: drivers/reset/reset-gpio.c @@ -9653,7 +9644,9 @@ L: linux-input@vger.kernel.org S: Maintained F: drivers/hid/hid-logitech-hidpp.c -HIGH-RESOLUTION TIMERS, CLOCKEVENTS +HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS +M: Anna-Maria Behnsen +M: Frederic Weisbecker M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained @@ -9661,9 +9654,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: Documentation/timers/ F: include/linux/clockchips.h F: include/linux/hrtimer.h +F: include/linux/timer.h F: kernel/time/clockevents.c F: kernel/time/hrtimer.c -F: kernel/time/timer_*.c +F: kernel/time/timer.c +F: kernel/time/timer_list.c +F: kernel/time/timer_migration.* +F: tools/testing/selftests/timers/ HIGH-SPEED SCC DRIVER FOR AX.25 L: linux-hams@vger.kernel.org @@ -10026,7 +10023,7 @@ F: drivers/media/platform/st/sti/hva HWPOISON MEMORY FAILURE HANDLING M: Miaohe Lin -R: Naoya Horiguchi +R: Naoya Horiguchi L: linux-mm@kvack.org S: Maintained F: mm/hwpoison-inject.c @@ -11997,7 +11994,7 @@ F: include/keys/encrypted-type.h F: security/keys/encrypted-keys/ KEYS-TRUSTED -M: James Bottomley +M: James Bottomley M: Jarkko Sakkinen M: Mimi Zohar L: linux-integrity@vger.kernel.org @@ -13134,6 +13131,7 @@ F: drivers/net/ethernet/marvell/mvpp2/ MARVELL MWIFIEX WIRELESS DRIVER M: Brian Norris +R: Francesco Dolcini L: linux-wireless@vger.kernel.org S: Odd Fixes F: drivers/net/wireless/marvell/mwifiex/ @@ -13290,7 +13288,7 @@ F: drivers/iio/adc/max11205.c MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS R: Iskren Chernev -R: Krzysztof Kozlowski +R: Krzysztof Kozlowski R: Marek Szyprowski R: Matheus Castello L: linux-pm@vger.kernel.org @@ -13300,7 +13298,7 @@ F: drivers/power/supply/max17040_battery.c MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS R: Hans de Goede -R: Krzysztof Kozlowski +R: Krzysztof Kozlowski R: Marek Szyprowski R: Sebastian Krzyszkowiak R: Purism Kernel Team @@ -13358,7 +13356,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml F: drivers/power/supply/max77976_charger.c MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-pm@vger.kernel.org S: Maintained B: mailto:linux-samsung-soc@vger.kernel.org @@ -13369,7 +13367,7 @@ F: drivers/power/supply/max77693_charger.c MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained B: mailto:linux-samsung-soc@vger.kernel.org @@ -14014,6 +14012,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed +M: Tariq Toukan L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -14081,6 +14080,7 @@ F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX5 core VPI driver M: Saeed Mahameed M: Leon Romanovsky +M: Tariq Toukan L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported @@ -14151,7 +14151,7 @@ F: mm/mm_init.c F: tools/testing/memblock/ MEMORY CONTROLLER DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained B: mailto:krzysztof.kozlowski@linaro.org @@ -14356,7 +14356,7 @@ F: drivers/dma/at_xdmac.c F: include/dt-bindings/dma/at91.h MICROCHIP AT91 SERIAL DRIVER -M: Richard Genoud +M: Richard Genoud S: Maintained F: Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml F: drivers/tty/serial/atmel_serial.c @@ -15532,7 +15532,7 @@ F: include/uapi/linux/nexthop.h F: net/ipv4/nexthop.c NFC SUBSYSTEM -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/nfc/ @@ -15627,9 +15627,10 @@ F: drivers/misc/nsm.c F: include/uapi/linux/nsm.h NOHZ, DYNTICKS SUPPORT +M: Anna-Maria Behnsen M: Frederic Weisbecker -M: Thomas Gleixner M: Ingo Molnar +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz @@ -15908,7 +15909,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml F: drivers/regulator/pf8x00-regulator.c NXP PTN5150A CC LOGIC AND EXTCON DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml @@ -16519,7 +16520,7 @@ K: of_overlay_remove OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS M: Rob Herring -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Conor Dooley L: devicetree@vger.kernel.org S: Maintained @@ -16725,9 +16726,9 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross -R: Ajay Kaher -R: Alexey Makhalov -R: VMware PV-Drivers Reviewers +R: Ajay Kaher +R: Alexey Makhalov +R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -16966,7 +16967,6 @@ F: drivers/pci/controller/dwc/pci-exynos.c PCI DRIVER FOR SYNOPSYS DESIGNWARE M: Jingoo Han -M: Gustavo Pimentel M: Manivannan Sadhasivam L: linux-pci@vger.kernel.org S: Maintained @@ -17477,7 +17477,7 @@ F: Documentation/devicetree/bindings/pinctrl/renesas,* F: drivers/pinctrl/renesas/ PIN CONTROLLER - SAMSUNG -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -17590,15 +17590,20 @@ F: drivers/pnp/ F: include/linux/pnp.h POSIX CLOCKS and TIMERS +M: Anna-Maria Behnsen +M: Frederic Weisbecker M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: fs/timerfd.c F: include/linux/time_namespace.h -F: include/linux/timer* +F: include/linux/timerfd.h +F: include/uapi/linux/time.h +F: include/uapi/linux/timerfd.h F: include/trace/events/timer* -F: kernel/time/*timer* +F: kernel/time/itimer.c +F: kernel/time/posix-* F: kernel/time/namespace.c POWER MANAGEMENT CORE @@ -18645,18 +18650,21 @@ REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtlwifi/ REALTEK WIRELESS DRIVER (rtw88) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtw88/ REALTEK WIRELESS DRIVER (rtw89) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER @@ -18727,13 +18735,24 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c -RENESAS ETHERNET DRIVERS +RENESAS ETHERNET AVB DRIVER R: Sergey Shtylyov L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org -F: Documentation/devicetree/bindings/net/renesas,*.yaml -F: drivers/net/ethernet/renesas/ -F: include/linux/sh_eth.h +F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/ravb* + +RENESAS ETHERNET SWITCH DRIVER +R: Yoshihiro Shimoda +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/net/renesas,*ether-switch.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/rcar_gen4* +F: drivers/net/ethernet/renesas/rswitch* RENESAS IDT821034 ASoC CODEC M: Herve Codina @@ -18843,6 +18862,16 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml F: drivers/i2c/busses/i2c-rzv2m.c +RENESAS SUPERH ETHERNET DRIVER +R: Sergey Shtylyov +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/net/renesas,ether.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/sh_eth* +F: include/linux/sh_eth.h + RENESAS USB PHY DRIVER M: Yoshihiro Shimoda L: linux-renesas-soc@vger.kernel.org @@ -19179,12 +19208,14 @@ M: Hin-Tak Leung M: Larry Finger L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtl818x/rtl8187/ RTL8XXXU WIRELESS DRIVER (rtl8xxxu) M: Jes Sorensen L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtl8xxxu/ RTRS TRANSPORT DRIVERS @@ -19414,7 +19445,7 @@ F: Documentation/devicetree/bindings/sound/samsung* F: sound/soc/samsung/ SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -19449,7 +19480,7 @@ S: Maintained F: drivers/platform/x86/samsung-laptop.c SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -19475,7 +19506,7 @@ F: drivers/media/platform/samsung/s3c-camif/ F: include/media/drv-intf/s3c_camif.h SAMSUNG S3FWRN5 NFC DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski S: Maintained F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml F: drivers/nfc/s3fwrn5 @@ -19496,7 +19527,7 @@ S: Supported F: drivers/media/i2c/s5k5baf.c SAMSUNG S5P Security SubSystem (SSS) DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Vladimir Zapolskiy L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@ -19518,7 +19549,7 @@ F: Documentation/devicetree/bindings/media/samsung,fimc.yaml F: drivers/media/platform/samsung/exynos4-is/ SAMSUNG SOC CLOCK DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki M: Chanwoo Choi R: Alim Akhtar @@ -19550,7 +19581,7 @@ F: drivers/net/ethernet/samsung/sxgbe/ SAMSUNG THERMAL DRIVER M: Bartlomiej Zolnierkiewicz -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-pm@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -19637,7 +19668,7 @@ F: drivers/scsi/sg.c F: include/scsi/sg.h SCSI SUBSYSTEM -M: "James E.J. Bottomley" +M: "James E.J. Bottomley" M: "Martin K. Petersen" L: linux-scsi@vger.kernel.org S: Maintained @@ -22254,13 +22285,20 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: include/linux/clocksource.h F: include/linux/time.h +F: include/linux/timekeeper_internal.h +F: include/linux/timekeeping.h F: include/linux/timex.h F: include/uapi/linux/time.h F: include/uapi/linux/timex.h F: kernel/time/alarmtimer.c -F: kernel/time/clocksource.c -F: kernel/time/ntp.c -F: kernel/time/time*.c +F: kernel/time/clocksource* +F: kernel/time/ntp* +F: kernel/time/time.c +F: kernel/time/timeconst.bc +F: kernel/time/timeconv.c +F: kernel/time/timecounter.c +F: kernel/time/timekeeping* +F: kernel/time/time_test.c F: tools/testing/selftests/timers/ TIPC NETWORK LAYER @@ -22384,6 +22422,7 @@ S: Maintained W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git +F: Documentation/devicetree/bindings/tpm/ F: drivers/char/tpm/ TPS546D24 DRIVER @@ -22530,6 +22569,7 @@ Q: https://patchwork.kernel.org/project/linux-pm/list/ B: https://bugzilla.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat F: tools/power/x86/turbostat/ +F: tools/testing/selftests/turbostat/ TW5864 VIDEO4LINUX DRIVER M: Bluecherry Maintainers @@ -23608,9 +23648,9 @@ S: Supported F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE -M: Ajay Kaher -M: Alexey Makhalov -R: VMware PV-Drivers Reviewers +M: Ajay Kaher +M: Alexey Makhalov +R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -23619,34 +23659,34 @@ F: arch/x86/include/asm/vmware.h F: arch/x86/kernel/cpu/vmware.c VMWARE PVRDMA DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/vmw_pvrdma/ VMWARE PVSCSI DRIVER -M: Vishal Bhakta -R: VMware PV-Drivers Reviewers +M: Vishal Bhakta +R: Broadcom internal kernel review list L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -M: Jeff Sipek -R: Ajay Kaher -R: Alexey Makhalov -R: VMware PV-Drivers Reviewers +M: Nick Shi +R: Ajay Kaher +R: Alexey Makhalov +R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c VMWARE VMCI DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-kernel@vger.kernel.org S: Supported F: drivers/misc/vmw_vmci/ @@ -23661,16 +23701,16 @@ F: drivers/input/mouse/vmmouse.c F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER -M: Ronak Doshi -R: VMware PV-Drivers Reviewers +M: Ronak Doshi +R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported F: drivers/net/vmxnet3/ VMWARE VSOCK VMCI TRANSPORT DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-kernel@vger.kernel.org S: Supported F: net/vmw_vsock/vmci_transport* @@ -23738,7 +23778,7 @@ S: Orphan F: drivers/mmc/host/vub300.c W1 DALLAS'S 1-WIRE BUS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski S: Maintained F: Documentation/devicetree/bindings/w1/ F: Documentation/w1/ diff --git a/Makefile b/Makefile index 763b6792d3d5..43b10f3d438c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 9f066785bb71..65afb1de48b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB config PAGE_SHIFT int - default 12 if PAGE_SIZE_4KB - default 13 if PAGE_SIZE_8KB - default 14 if PAGE_SIZE_16KB - default 15 if PAGE_SIZE_32KB - default 16 if PAGE_SIZE_64KB - default 18 if PAGE_SIZE_256KB + default 12 if PAGE_SIZE_4KB + default 13 if PAGE_SIZE_8KB + default 14 if PAGE_SIZE_16KB + default 15 if PAGE_SIZE_32KB + default 16 if PAGE_SIZE_64KB + default 18 if PAGE_SIZE_256KB # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi index 1235a71c6abe..52869e68f833 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi @@ -666,7 +666,7 @@ bus-width = <4>; no-1-8-v; no-sdio; - no-emmc; + no-mmc; status = "okay"; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts index ba7231b364bb..7bab113ca6da 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts @@ -210,6 +210,7 @@ remote-endpoint = <&mipi_from_sensor>; clock-lanes = <0>; data-lanes = <1>; + link-frequencies = /bits/ 64 <330000000>; }; }; }; diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h new file mode 100644 index 000000000000..2189e507c8e0 --- /dev/null +++ b/arch/arm/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include +#include + +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 31755a378c73..ff2a4a4d8220 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = { static struct gpiod_lookup_table tusb_gpio_table = { .dev_id = "musb-tusb", .table = { - GPIO_LOOKUP("gpio-0-15", 0, "enable", - GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-48-63", 10, "int", - GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH), { } }, }; @@ -140,12 +138,11 @@ static int slot1_cover_open; static int slot2_cover_open; static struct device *mmc_device; -static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { +static struct gpiod_lookup_table nokia800_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { /* Slot switch, GPIO 96 */ - GPIO_LOOKUP("gpio-80-111", 16, - "switch", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), { } }, }; @@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { static struct gpiod_lookup_table nokia810_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { + /* Slot switch, GPIO 96 */ + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), /* Slot index 1, VSD power, GPIO 23 */ - GPIO_LOOKUP_IDX("gpio-16-31", 7, - "vsd", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH), /* Slot index 1, VIO power, GPIO 9 */ - GPIO_LOOKUP_IDX("gpio-0-15", 9, - "vio", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH), { } }, }; @@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC]; static void __init n8x0_mmc_init(void) { - gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table); - if (board_is_n810()) { mmc1_data.slots[0].name = "external"; @@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void) mmc1_data.slots[1].name = "internal"; mmc1_data.slots[1].ban_openended = 1; gpiod_add_lookup_table(&nokia810_mmc_gpio_table); + } else { + gpiod_add_lookup_table(&nokia800_mmc_gpio_table); } mmc1_data.nr_slots = 2; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 3c42240e78e2..4aaf5a0c1ed8 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 { interrupts = ; fsl,usbphy = <&usbphy1>; fsl,usbmisc = <&usbmisc1 0>; - clocks = <&usb2_lpcg 0>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_6>; ahb-burst-config = <0x0>; tx-burst-size-dword = <0x10>; rx-burst-size-dword = <0x10>; @@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 { usbphy1: usbphy@5b100000 { compatible = "fsl,imx7ulp-usbphy"; reg = <0x5b100000 0x1000>; - clocks = <&usb2_lpcg 1>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_7>; power-domains = <&pd IMX_SC_R_USB_0_PHY>; status = "disabled"; }; @@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index cab3468b1875..f7a91d43a0ff 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&spi0_lpcg 0>, - <&spi0_lpcg 1>; + clocks = <&spi0_lpcg IMX_LPCG_CLK_0>, + <&spi0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&spi1_lpcg 0>, - <&spi1_lpcg 1>; + clocks = <&spi1_lpcg IMX_LPCG_CLK_0>, + <&spi1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&spi2_lpcg 0>, - <&spi2_lpcg 1>; + clocks = <&spi2_lpcg IMX_LPCG_CLK_0>, + <&spi2_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&spi3_lpcg 0>, - <&spi3_lpcg 1>; + clocks = <&spi3_lpcg IMX_LPCG_CLK_0>, + <&spi3_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 { compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm"; reg = <0x5a190000 0x1000>; interrupts = ; - clocks = <&adma_pwm_lpcg 1>, - <&adma_pwm_lpcg 0>; + clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>, + <&adma_pwm_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a880000 0x10000>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&adc0_lpcg 0>, - <&adc0_lpcg 1>; + clocks = <&adc0_lpcg IMX_LPCG_CLK_0>, + <&adc0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a890000 0x10000>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&adc1_lpcg 0>, - <&adc1_lpcg 1>; + clocks = <&adc1_lpcg IMX_LPCG_CLK_0>, + <&adc1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a8d0000 0x10000>; interrupts = ; interrupt-parent = <&gic>; - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 { * CAN1 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 { * CAN2 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 7e510b21bbac..764c1a08e3b1 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d000000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm0_lpcg 4>, - <&pwm0_lpcg 1>; + clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>, + <&pwm0_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d010000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm1_lpcg 4>, - <&pwm1_lpcg 1>; + clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>, + <&pwm1_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d020000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm2_lpcg 4>, - <&pwm2_lpcg 1>; + clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>, + <&pwm2_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d030000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm3_lpcg 4>, - <&pwm3_lpcg 1>; + clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>, + <&pwm3_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 41c79d2ebdd6..f24b14744799 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -183,7 +184,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index d5c400b355af..f5491a608b2f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -202,7 +203,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 11626fae5f97..aa9f28c4431d 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -153,15 +153,15 @@ }; &flexcan2 { - clocks = <&can1_lpcg 1>, - <&can1_lpcg 0>; + clocks = <&can1_lpcg IMX_LPCG_CLK_4>, + <&can1_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; &flexcan3 { - clocks = <&can2_lpcg 1>, - <&can2_lpcg 0>; + clocks = <&can2_lpcg IMX_LPCG_CLK_4>, + <&can2_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index f3a6da8b2890..5260c63db007 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -944,6 +944,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3b0e8248e1a4..a75de2665d84 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 3. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. We save this for last to * ensure 64KB start alignment is maintained for the LPA2 case. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ce08b744aaab..cb68adcabe07 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) adr_l x1, __hyp_text_end adr_l x2, dcache_clean_poc blr x2 + + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el2, x0 + isb 0: mov_q x0, HCR_HOST_NVHE_FLAGS + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be + * RES1 in that case. Publish the E2H bit early so that + * it can be picked up by the init_el2_state macro. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f + + orr x0, x0, #HCR_E2H +1: msr hcr_el2, x0 isb @@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) mov_q x1, INIT_SCTLR_EL1_MMU_OFF - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x0, SYS_ID_AA64MMFR4_EL1 - ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH - tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 2f -1: + /* Set a sane SCTLR_EL1, the VHE way */ - pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 mov x2, #BOOT_CPU_FLAG_E2H b 3f 2: - pre_disable_mmu_workaround msr sctlr_el1, x1 mov x2, xzr 3: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 162b030ab9da..0d022599eb61 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, { unsigned int vq; bool active; - bool fpsimd_only; enum vec_type task_type; memset(header, 0, sizeof(*header)); @@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = false; break; default: WARN_ON_ONCE(1); @@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, } if (active) { - if (fpsimd_only) { + if (target->thread.fp_type == FP_STATE_FPSIMD) { header->flags |= SVE_PT_REGS_FPSIMD; } else { header->flags |= SVE_PT_REGS_SVE; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..c4a0a35e02c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) { - kvm_info("Protected nVHE mode initialized successfully\n"); - } else if (in_hyp_mode) { - kvm_info("VHE mode initialized successfully\n"); - } else { - char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n'; - kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode); - } + kvm_info("%s%sVHE mode initialized successfully\n", + in_hyp_mode ? "" : (is_protected_kvm_enabled() ? + "Protected " : "Hyp "), + in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? + "h" : "n")); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..2fc68da4036d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt, false); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..5a59ef88b646 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; @@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * Perform the appropriate TLB invalidation based on the * evicted pte value (if any). */ - if (kvm_pte_table(ctx->old, ctx->level)) - kvm_tlb_flush_vmid_range(mmu, ctx->addr, - kvm_granule_size(ctx->level)); - else if (kvm_pte_valid(ctx->old)) + if (kvm_pte_table(ctx->old, ctx->level)) { + u64 size = kvm_granule_size(ctx->level); + u64 addr = ALIGN_DOWN(ctx->addr, size); + + kvm_tlb_flush_vmid_range(mmu, addr, size); + } else if (kvm_pte_valid(ctx->old)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } if (stage2_pte_is_counted(ctx->old)) @@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt)) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, - ctx->addr, ctx->level); + if (kvm_pte_table(ctx->old, ctx->level)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + TLBI_TTL_UNKNOWN); + } else if (!stage2_unmap_defer_tlb_flush(pgt)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + ctx->level); + } } mm_ops->put_page(ctx->ptep); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..1a60b95381e8 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..dc04bc767865 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (esr_fsc_is_permission_fault(esr)) { + if (esr_fsc_is_translation_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0f0e10bb0a95..b872b003a55f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -276,7 +276,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *ptep = NULL; pgdp = pgd_offset(mm, addr); - p4dp = p4d_offset(pgdp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + return NULL; + pudp = pud_alloc(mm, p4dp, addr); if (!pudp) return NULL; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0c4e3ecf989d..0e270a1c51e6 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c5b461dda438..122021f9bdfc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: @@ -1256,7 +1256,7 @@ emit_cond_jmp: } else { emit_a64_mov_i(1, tmp, off, ctx); if (sign_extend) - emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + emit(A64_LDRSW(dst, src, tmp), ctx); else emit(A64_LDR32(dst, src, tmp), ctx); } diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1140051a0c45..1150b77fa281 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 49a70f8c3cab..b6aeb1f70e2a 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -100,6 +100,13 @@ #size-cells = <2>; dma-coherent; + isa@18000000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18000000 0x4000>; + }; + liointc0: interrupt-controller@1fe01400 { compatible = "loongson,liointc-2.0"; reg = <0x0 0x1fe01400 0x0 0x40>, diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts index dca91caf895e..74b99bd234cc 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -61,12 +61,45 @@ &gmac0 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy0>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <2>; + }; + }; }; &gmac1 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy1>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy1: ethernet-phy@1 { + reg = <2>; + }; + }; }; &gmac2 { status = "okay"; + + phy-mode = "rgmii"; + phy-handle = <&phy2>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy2: ethernet-phy@2 { + reg = <0>; + }; + }; }; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index a231949b5f55..9eab2d02cbe8 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -51,6 +51,13 @@ #address-cells = <2>; #size-cells = <2>; + isa@18400000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18400000 0x4000>; + }; + pmc: power-management@100d0000 { compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x100d0000 0x0 0x58>; @@ -109,6 +116,8 @@ msi: msi-controller@1fe01140 { compatible = "loongson,pch-msi-1.0"; reg = <0x0 0x1fe01140 0x0 0x8>; + interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <192>; @@ -140,27 +149,34 @@ #address-cells = <3>; #size-cells = <2>; device_type = "pci"; + msi-parent = <&msi>; bus-range = <0x0 0xff>; - ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>, + ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>, <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>; gmac0: ethernet@3,0 { reg = <0x1800 0x0 0x0 0x0 0x0>; - interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, + <13 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac1: ethernet@3,1 { reg = <0x1900 0x0 0x0 0x0 0x0>; - interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac2: ethernet@3,2 { reg = <0x1a00 0x0 0x0 0x0 0x0>; - interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>, + <18 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b24437e28c6e..7bd47d65bf7a 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -11,6 +11,7 @@ #define _ASM_ADDRSPACE_H #include +#include #include diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 4a8adcca329b..c2f9979b2979 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,11 +14,6 @@ #include #include -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) - extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); @@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define __io_aw() mmiowb() +#ifdef CONFIG_KFENCE +#define virt_to_phys(kaddr) \ +({ \ + (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \ + page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\ +}) + +#define phys_to_virt(paddr) \ +({ \ + extern char *__kfence_pool; \ + (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \ + page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\ +}) +#endif + #include #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 6c82aea1c993..a6a5760da3a3 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -16,6 +16,7 @@ static inline bool arch_kfence_init_pool(void) { int err; + char *kaddr, *vaddr; char *kfence_pool = __kfence_pool; struct vm_struct *area; @@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void) return false; } + kaddr = kfence_pool; + vaddr = __kfence_pool; + while (kaddr < kfence_pool + KFENCE_POOL_SIZE) { + set_page_address(virt_to_page(kaddr), vaddr); + kaddr += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + return true; } diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 44027060c54a..e85df33f11c7 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t; struct page *dmw_virt_to_page(unsigned long kaddr); struct page *tlb_virt_to_page(unsigned long kaddr); -#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) +#define pfn_to_phys(pfn) __pfn_to_phys(pfn) +#define phys_to_pfn(paddr) __phys_to_pfn(paddr) + +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr)) + +#ifndef CONFIG_KFENCE + +#define page_to_virt(page) __va(page_to_phys(page)) +#define virt_to_page(kaddr) phys_to_page(__pa(kaddr)) + +#else + +#define WANT_PAGE_VIRTUAL + +#define page_to_virt(page) \ +({ \ + extern char *__kfence_pool; \ + (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \ +}) #define virt_to_page(kaddr) \ ({ \ @@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr); dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ }) +#endif + +#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn)) +#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr)) + extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index a9630a81b38a..89af7c12e8c0 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include #include @@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr) { unsigned long vaddr = (unsigned long)kaddr; + if (is_kfence_address((void *)kaddr)) + return 1; + if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 2aae72e63871..bda018150000 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -11,13 +11,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { - return pfn_to_page(virt_to_pfn(kaddr)); + return phys_to_page(__pa(kaddr)); } EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { - return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); + return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr)))); } EXPORT_SYMBOL(tlb_virt_to_page); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 06ef440d16ce..516dc7022bd7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -619,15 +619,6 @@ config MACH_EYEQ5 bool -config FIT_IMAGE_FDT_EPM5 - bool "Include FDT for Mobileye EyeQ5 development platforms" - depends on MACH_EYEQ5 - default n - help - Enable this to include the FDT for the EyeQ5 development platforms - from Mobileye in the FIT kernel image. - This requires u-boot on the platform. - config MACH_NINTENDO64 bool "Nintendo 64 console" select CEVT_R4K @@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC endchoice +config FIT_IMAGE_FDT_EPM5 + bool "Include FDT for Mobileye EyeQ5 development platforms" + depends on MACH_EYEQ5 + default n + help + Enable this to include the FDT for the EyeQ5 development platforms + from Mobileye in the FIT kernel image. + This requires u-boot on the platform. + source "arch/mips/alchemy/Kconfig" source "arch/mips/ath25/Kconfig" source "arch/mips/ath79/Kconfig" diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index d14d0e37ad02..4a2b40ce39e0 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs); #define exception_ip(regs) exception_ip(regs) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern void die(const char *, struct pt_regs *) __noreturn; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index d1b11f66f748..cb1045ebab06 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -101,6 +101,7 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_REGS, thread_info, regs); + OFFSET(TI_SYSCALL, thread_info, syscall); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 59288c13b581..61503a36067e 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request, * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) +asmlinkage long syscall_trace_enter(struct pt_regs *regs) { user_exit(); - current_thread_info()->syscall = syscall; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (ptrace_report_syscall_entry(regs)) return -1; - syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP @@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) struct seccomp_data sd; unsigned long args[6]; - sd.nr = syscall; + sd.nr = current_thread_info()->syscall; sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) @@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) ret = __secure_computing(&sd); if (ret == -1) return ret; - syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], + audit_syscall_entry(current_thread_info()->syscall, + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ - if (syscall < 0) + if (current_thread_info()->syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); - return syscall; + return current_thread_info()->syscall; } /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 18dc9b345056..2c604717e630 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -77,6 +77,18 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + */ + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 @@ -114,16 +126,7 @@ syscall_trace_entry: SAVE_STATIC move a0, sp - /* - * syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - */ - move a1, v0 - subu t2, v0, __NR_O32_Linux - bnez t2, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 97456b2ca7dc..97788859238c 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -72,7 +74,6 @@ syscall_common: n32_syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index e6264aa62e45..be11ea5cc67e 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -82,7 +84,6 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d3c2616cba22..7a5abb73e531 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -79,6 +79,22 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * absolute syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + * note: NR_syscall is the first O32 syscall but the macro is + * only defined when compiling with -mabi=32 (CONFIG_32BIT) + * therefore __NR_O32_Linux is used (4000) + */ + + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -113,22 +129,7 @@ trace_a_syscall: sd a7, PT_R11(sp) # For indirect syscalls move a0, sp - /* - * absolute syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - * note: NR_syscall is the first O32 syscall but the macro is - * only defined when compiling with -mabi=32 (CONFIG_32BIT) - * therefore __NR_O32_Linux is used (4000) - */ - .set push - .set reorder - subu t1, v0, __NR_O32_Linux - move a1, v0 - bnez t1, 1f /* __NR_syscall at offset 0 */ - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ - .set pop - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 8d98af5c7201..9a8393e6b4a8 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -21,7 +21,8 @@ void __init early_init_devtree(void *params) { - __be32 *dtb = (u32 *)__dtb_start; + __be32 __maybe_unused *dtb = (u32 *)__dtb_start; + #if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR) if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) == OF_DT_HEADER) { @@ -30,8 +31,11 @@ void __init early_init_devtree(void *params) return; } #endif + +#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER) params = (void *)__dtb_start; +#endif early_init_dt_scan(params); } diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c index 74fb86b0d209..7c728755852e 100644 --- a/arch/powerpc/crypto/chacha-p10-glue.c +++ b/arch/powerpc/crypto/chacha-p10-glue.c @@ -197,6 +197,9 @@ static struct skcipher_alg algs[] = { static int __init chacha_p10_init(void) { + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + static_branch_enable(&have_p10); return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); @@ -204,10 +207,13 @@ static int __init chacha_p10_init(void) static void __exit chacha_p10_exit(void) { + if (!static_branch_likely(&have_p10)) + return; + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init); +module_init(chacha_p10_init); module_exit(chacha_p10_exit); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)"); diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include #include #include #include @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1185efebf032..29a8c8e18585 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1285,15 +1285,14 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; + struct iommu_group *grp; /* At first attach the ownership is already set */ - if (!domain) { - iommu_group_put(grp); + if (!domain) return 0; - } + grp = iommu_group_get(dev); table_group = iommu_group_get_iommudata(grp); /* * The domain being set to PLATFORM from earlier diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 252d63942f34..5b3115a19852 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -151,7 +151,7 @@ endif endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg -vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so +vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 97fcde30e247..9f8ea0e33eb1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } +#define pgprot_nx pgprot_nx +static inline pgprot_t pgprot_nx(pgprot_t _prot) +{ + return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC); +} + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) { diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 980094c2e976..ac80216549ff 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); ulong) \ __attribute__((alias(__stringify(___se_##prefix##name)))); \ __diag_pop(); \ - static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + __used; \ static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) #define SC_RISCV_REGS_TO_ARGS(x, ...) \ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd..72ec1d9bd3f3 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 10aaa83db89e..95050ebe9ad0 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -34,7 +34,7 @@ #define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 9 +#define AT_VECTOR_SIZE_ARCH 10 #define AT_MINSIGSTKSZ 51 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..3df4cb788c1f 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..30e12b310cab 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); @@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..e4bc61c4e58a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -27,8 +27,6 @@ #include #include -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include unsigned long __stack_chk_guard __read_mostly; @@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..5a2edd7f027e 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 868d6280cf66..05a16b1f0aee 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..272c431ac5b9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,6 +37,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index 39e72aa016a4..b467ba5ed910 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) raw_spin_lock_irqsave(&irqd->lock, flags); sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (!pending && - ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || - (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) + if (sm == APLIC_SOURCECFG_SM_INACTIVE) goto skip_write_pending; + if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { + if (!pending) + goto skip_write_pending; + if ((irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) + goto skip_write_pending; + if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) + goto skip_write_pending; + } + if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else @@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) static bool aplic_read_input(struct aplic *aplic, u32 irq) { - bool ret; - unsigned long flags; + u32 sourcecfg, sm, raw_input, irq_inverted; struct aplic_irq *irqd; + unsigned long flags; + bool ret = false; if (!irq || aplic->nr_irqs <= irq) return false; irqd = &aplic->irqs[irq]; raw_spin_lock_irqsave(&irqd->lock, flags); - ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; + + sourcecfg = irqd->sourcecfg; + if (sourcecfg & APLIC_SOURCECFG_D) + goto skip; + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip; + + raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + ret = !!(raw_input ^ irq_inverted); + +skip: raw_spin_unlock_irqrestore(&irqd->lock, flags); return ret; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f4a6124d25c9..994adc26db4b 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) { - return copy_isa_ext_reg_indices(vcpu, NULL);; + return copy_isa_ext_reg_indices(vcpu, NULL); } static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..07d743f87b3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, +static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { @@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, start, end - start, PAGE_SIZE); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index aac190085472..1adf2f39ce59 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *fm; + int idx; + + fm = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!fm) + return -EINVAL; + + for (idx = 0; idx < fm->nr_args; idx++) { + u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); + + if (fm->arg_size[idx] == sizeof(int)) + emit_sextw(reg, reg, ctx); + } + } + ret = emit_call(addr, fixed_addr, ctx); if (ret) return ret; diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7138d189cc42..0c4cad7d5a5b 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,31 +15,31 @@ #include #include -static inline int arch_atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return __atomic_read(v); } #define arch_atomic_read arch_atomic_read -static inline void arch_atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { __atomic_set(v, i); } #define arch_atomic_set arch_atomic_set -static inline int arch_atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter) + i; } #define arch_atomic_add_return arch_atomic_add_return -static inline int arch_atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter); } #define arch_atomic_fetch_add arch_atomic_fetch_add -static inline void arch_atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { __atomic_add(i, &v->counter); } @@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) #define ATOMIC_OPS(op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ +static __always_inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __atomic_##op(i, &v->counter); \ } \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ return __atomic_##op##_barrier(i, &v->counter); \ } @@ -74,7 +74,7 @@ ATOMIC_OPS(xor) #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return __atomic_cmpxchg(&v->counter, old, new); } @@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __atomic64_read(v); } #define arch_atomic64_read arch_atomic64_read -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __atomic64_set(v, i); } #define arch_atomic64_set arch_atomic64_set -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter) + i; } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline void arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __atomic64_add(i, (long *)&v->counter); } @@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v) #define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define ATOMIC64_OPS(op) \ -static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ -{ \ - __atomic64_##op(i, (long *)&v->counter); \ -} \ -static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ -{ \ - return __atomic64_##op##_barrier(i, (long *)&v->counter); \ +#define ATOMIC64_OPS(op) \ +static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + __atomic64_##op(i, (long *)&v->counter); \ +} \ +static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 50510e08b893..7fa5f96a553a 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,7 +8,7 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ -static inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v) return c; } -static inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( " st %1,%0\n" : "=R" (v->counter) : "d" (i)); } -static inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline s64 __atomic64_read(const atomic64_t *v) { s64 c; @@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v) return c; } -static inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" @@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \ -static inline op_type op_name(op_type val, op_type *ptr) \ +static __always_inline op_type op_name(op_type val, op_type *ptr) \ { \ op_type old; \ \ @@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #define __ATOMIC_OP(op_name, op_string) \ -static inline int op_name(int val, int *ptr) \ +static __always_inline int op_name(int val, int *ptr) \ { \ int old, new; \ \ @@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr") #undef __ATOMIC_OPS #define __ATOMIC64_OP(op_name, op_string) \ -static inline long op_name(long val, long *ptr) \ +static __always_inline long op_name(long val, long *ptr) \ { \ long old, new; \ \ @@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -static inline int __atomic_cmpxchg(int *ptr, int old, int new) +static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) { asm volatile( " cs %[old],%[new],%[ptr]" @@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } -static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static inline long __atomic64_cmpxchg(long *ptr, long old, long new) +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) { asm volatile( " csg %[old],%[new],%[ptr]" @@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index bf15da0fedbc..0e3da500e98c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,12 +12,12 @@ #define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { int old, new; @@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc) old, new) != old); } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { /* * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES @@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val) __atomic_add(val, &S390_lowcore.preempt_count); } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { __preempt_count_add(-val); } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(S390_lowcore.preempt_count) == preempt_offset); @@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset) #define PREEMPT_ENABLED (0) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count); } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { S390_lowcore.preempt_count = pc; } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return false; } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { S390_lowcore.preempt_count += val; } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { S390_lowcore.preempt_count -= val; } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return !--S390_lowcore.preempt_count && tif_need_resched(); } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(preempt_count() == preempt_offset && tif_need_resched()); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 787394978bc0..6a1e0fbbaa15 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler) mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - lg %r12,__LC_GMAP + ltg %r12,__LC_GMAP + jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST @@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw) SYM_DATA_END(daton_psw) .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) #include "asm/syscall_table.h" diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 823d652e3917..4ad472d130a3 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event) event->cpu); struct paicrypt_map *cpump = mp->mapptr; - cpump->event = NULL; static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" @@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags) static void paicrypt_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paicrypt_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 616a25606cd6..a6da7e0cc7a6 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event) free_page(PAI_SAVE_AREA(event)); mutex_lock(&paiext_reserve_mutex); - cpump->event = NULL; if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags) static void paiext_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paiext_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index c421dd44ffbe..0c66b32e0f9f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; gmap = (struct gmap *)S390_lowcore.gmap; - if (regs->cr1 == gmap->asce) + if (gmap && gmap->asce == regs->cr1) return GMAP_FAULT; return KERNEL_FAULT; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b418333bb086..5af0402e94b8 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) * PLT for hotpatchable calls. The calling convention is the same as for the * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. */ -extern const char bpf_plt[]; -extern const char bpf_plt_ret[]; -extern const char bpf_plt_target[]; -extern const char bpf_plt_end[]; -#define BPF_PLT_SIZE 32 +struct bpf_plt { + char code[16]; + void *ret; + void *target; +} __packed; +extern const struct bpf_plt bpf_plt; asm( ".pushsection .rodata\n" " .balign 8\n" @@ -531,15 +532,14 @@ asm( " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" - "bpf_plt_end:\n" " .popsection\n" ); -static void bpf_jit_plt(void *plt, void *ret, void *target) +static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { - memcpy(plt, bpf_plt, BPF_PLT_SIZE); - *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; - *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; + memcpy(plt, &bpf_plt, sizeof(*plt)); + plt->ret = ret; + plt->target = target; } /* @@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; if (jit->prg_buf) - bpf_jit_plt(jit->prg_buf + jit->prg, + bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), jit->prg_buf + jit->prologue_plt_ret, NULL); - jit->prg += BPF_PLT_SIZE; + jit->prg += sizeof(struct bpf_plt); } static int get_probe_mem_regno(const u8 *insn) @@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) - return orig_fp; - if (!fp->jit_requested) return orig_fp; @@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void) int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { + struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { u16 opc; s32 disp; } __packed insn; - char expected_plt[BPF_PLT_SIZE]; - char current_plt[BPF_PLT_SIZE]; - char new_plt[BPF_PLT_SIZE]; - char *plt; char *ret; int err; @@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, */ } else { /* Verify the PLT. */ - plt = (char *)ip + (insn.disp << 1); - err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); + plt = ip + (insn.disp << 1); + err = copy_from_kernel_nofault(¤t_plt, plt, + sizeof(current_plt)); if (err < 0) return err; ret = (char *)ip + 6; - bpf_jit_plt(expected_plt, ret, old_addr); - if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) + bpf_jit_plt(&expected_plt, ret, old_addr); + if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) return -EINVAL; /* Adjust the call address. */ - bpf_jit_plt(new_plt, ret, new_addr); - s390_kernel_write(plt + (bpf_plt_target - bpf_plt), - new_plt + (bpf_plt_target - bpf_plt), + bpf_jit_plt(&new_plt, ret, new_addr); + s390_kernel_write(&plt->target, &new_plt.target, sizeof(void *)); } diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 6a1f36df6a18..cf0ad89f5639 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,7 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ -obj-y += virt/svm/ +obj-y += virt/ # for cleaning subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 39886bab943a..4474bf32d0a4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,8 @@ config USE_X86_SEG_SUPPORT # with named address spaces - see GCC PR sanitizer/111736. # depends on !KASAN + # -fsanitize=thread (KCSAN) is also incompatible. + depends on !KCSAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) @@ -2631,6 +2633,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" + depends on CPU_SUP_INTEL + default y + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 662d9d4033e6..5ab93fcdd691 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -251,8 +251,6 @@ archheaders: libs-y += arch/x86/lib/ -core-y += arch/x86/virt/ - # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 719e939050cb..876fc6d46a13 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include +#include #include #include #include #include +#include .code64 .text @@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d07be9d05cd0..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,19 +3,28 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld . All Rights Reserved. * * Author: Tom Lendacky */ #include #include +#include +#include +#include #include #include enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6356060caaf3..51cc9c7cb9bd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } @@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; @@ -255,6 +255,71 @@ DEFINE_IDTENTRY_RAW(int80_emulation) instrumentation_end(); syscall_exit_to_user_mode(regs); } + +#ifdef CONFIG_X86_FRED +/* + * A FRED-specific INT80 handler is warranted for the follwing reasons: + * + * 1) As INT instructions and hardware interrupts are separate event + * types, FRED does not preclude the use of vector 0x80 for external + * interrupts. As a result, the FRED setup code does not reserve + * vector 0x80 and calling int80_is_external() is not merely + * suboptimal but actively incorrect: it could cause a system call + * to be incorrectly ignored. + * + * 2) It is called only for handling vector 0x80 of event type + * EVENT_TYPE_SWINT and will never be called to handle any external + * interrupt (event type EVENT_TYPE_EXTINT). + * + * 3) FRED has separate entry flows depending on if the event came from + * user space or kernel space, and because the kernel does not use + * INT insns, the FRED kernel entry handler fred_entry_from_kernel() + * falls through to fred_bad_type() if the event type is + * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling + * an INT insn, it can only be from a user level. + * + * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will + * likely take a different approach if it is ever needed: it + * probably belongs in either fred_intx()/ fred_other() or + * asm_fred_entrypoint_user(), depending on if this ought to be done + * for all entries from userspace or only system + * calls. + * + * 5) INT $0x80 is the fast path for 32-bit system calls under FRED. + */ +DEFINE_FREDENTRY_RAW(int80_emulation) +{ + int nr; + + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * FRED pushed 0 into regs::orig_ax and regs::ax contains the + * syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif #else /* CONFIG_IA32_EMULATION */ /* Handles int $0x80 on a 32bit kernel */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8af2a26b24f6..1b5be07f8669 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index eabf48c4d4b4..c779046cc3fe 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index ac120cbdaaf2..89c1476fcdd9 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -28,9 +28,9 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code if (regs->fred_cs.sl > 0) { pr_emerg("PANIC: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - die("invalid or fatal FRED event", regs, regs->orig_ax); + die("invalid or fatal FRED event", regs, error_code); panic("invalid or fatal FRED event"); } else { unsigned long flags = oops_begin(); @@ -38,10 +38,10 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code pr_alert("BUG: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - if (__die("Invalid or fatal FRED event", regs, regs->orig_ax)) + if (__die("Invalid or fatal FRED event", regs, error_code)) sig = 0; oops_end(flags, regs, sig); @@ -66,7 +66,7 @@ static noinstr void fred_intx(struct pt_regs *regs) /* INT80 */ case IA32_SYSCALL_VECTOR: if (ia32_enabled()) - return int80_emulation(regs); + return fred_int80_emulation(regs); fallthrough; #endif diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f..c2235bae17ef 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9..33b3f09e6f15 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265..03de4a932131 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fd63051bbbbb..3d64bcc403cf 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index aec16e581f5b..985ef3b47919 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = /* * AMD Performance Monitor Family 17h and later: */ -static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, @@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; +static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, +}; + +static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120, +}; + static u64 amd_pmu_event_map(int hw_event) { - if (boot_cpu_data.x86 >= 0x17) - return amd_f17h_perfmon_event_map[hw_event]; + if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a) + return amd_zen4_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) + return amd_zen2_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN1)) + return amd_zen1_perfmon_event_map[hw_event]; return amd_perfmon_event_map[hw_event]; } @@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 4a1e600314d5..5149830c7c4f 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 09050641ce5d..5b0dd07b1ef1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 78cd5084104e..4367aa77cb8d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; lbr->info = x86_pmu.lbr_info; + lbr->has_callstack = x86_pmu_has_lbr_callstack(); } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5fc45543e955..0569f579338b 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu) * IPI implementation on Hyper-V. */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { struct hv_send_ipi_ex *ipi_arg; unsigned long flags; @@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, - exclude_self ? cpu_is_self : NULL); + nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be @@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, } status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, - ipi_arg, NULL); + ipi_arg, NULL); ipi_mask_ex_done: local_irq_restore(flags); @@ -155,7 +155,7 @@ ipi_mask_ex_done: } static bool __send_ipi_mask(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; @@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; /* @@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, } status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, - ipi_arg.cpu_mask); + ipi_arg.cpu_mask); return hv_result_success(status); do_ex_hypercall: @@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; if (vp >= 64) diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 68a0843d4750..3fa1f2ee7b0d 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -116,12 +115,11 @@ free_buf: int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) { - struct hv_add_logical_processor_in *input; - struct hv_add_logical_processor_out *output; + struct hv_input_add_logical_processor *input; + struct hv_output_add_logical_processor *output; u64 status; unsigned long flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* * When adding a logical processor, the hypervisor may return @@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) input->lp_index = lp_index; input->apic_id = apic_id; - input->flags = 0; - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, input, output); local_irq_restore(flags); @@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) u64 status; unsigned long irq_flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* Root VPs don't seem to need pages deposited */ if (partition_id != hv_current_partition_id) { @@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) input->vp_index = vp_index; input->flags = flags; input->subnode_type = HvSubnodeAny; - if (node != NUMA_NO_NODE) { - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; - } else { - input->proximity_domain_info.as_uint64 = 0; - } + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..67b68d0d17d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) return dest; } static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, - void *func) + void *func, void *ip) { return 0; } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 94ce0f7c9d3a..e6ab0cf15ed5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -13,6 +13,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } static inline void native_apic_mem_eoi(void) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 076bf8dee702..25466c4d2134 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index fe1e7e3cc844..63bdc6b85219 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -79,6 +79,9 @@ do { \ #define __smp_mb__before_atomic() do { } while (0) #define __smp_mb__after_atomic() do { } while (0) +/* Writing to CR3 provides a full memory barrier in switch_mm(). */ +#define smp_mb__after_switch_mm() do { } while (0) + #include #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index fb7388bbc212..c086699b0d0c 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) @@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" @@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f0337f7bcf16..3c7434329661 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -459,6 +459,18 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc and Linux defined features. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ + /* * BUG word(s) */ @@ -507,4 +519,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h index 152239f95541..7835b2cdff04 100644 --- a/arch/x86/include/asm/crash_reserve.h +++ b/arch/x86/include/asm/crash_reserve.h @@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void) #endif } +#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + #endif /* _X86_CRASH_RESERVE_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index da4054fbf533..c492bdc97b05 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -155,6 +155,7 @@ #define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16e07a2eee19..6efd1497b026 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -855,6 +855,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; struct kvm_hypervisor_cpuid kvm_cpuid; + bool is_amd_compatible; /* * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 05956bd8bacf..e72c2b872957 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,10 +61,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -163,6 +166,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fc3a8a3c7ffe..ff5f1ecc7d1e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -262,11 +262,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -282,8 +291,8 @@ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET + ALTERNATIVE_2 "", \ "call entry_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif @@ -317,6 +326,19 @@ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -342,6 +364,8 @@ extern void retbleed_return_thunk(void); static inline void retbleed_return_thunk(void) {} #endif +extern void srso_alias_untrain_ret(void); + #ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); @@ -357,6 +381,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 3736b8a46c04..7f1e17250546 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -555,6 +555,7 @@ struct x86_pmu_lbr { unsigned int from; unsigned int to; unsigned int info; + bool has_callstack; }; extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9477b4053bce..7f57382afee4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); -void kdump_sev_callback(void); void sev_show_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } @@ -244,12 +243,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; @@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } -static inline void kdump_sev_callback(void) { } static inline void sev_show_status(void) { } #endif @@ -270,6 +268,7 @@ int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); +void kdump_sev_callback(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } #endif #endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f44e2f9ab65d..2fc7bc3863ff 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include /* for TS_COMPAT */ #include +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. @@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b89b40f250e6..6149eabe200f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ad29984d5e39..ef11aa4cab42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,7 @@ enum sev_cmd_id { struct kvm_sev_cmd { __u32 id; + __u32 pad0; __u64 data; __u32 error; __u32 sev_fd; @@ -704,28 +705,35 @@ struct kvm_sev_launch_start { __u32 policy; __u64 dh_uaddr; __u32 dh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_launch_update_data { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_launch_secret { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_launch_measure { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_guest_status { @@ -738,33 +746,43 @@ struct kvm_sev_dbg { __u64 src_uaddr; __u64 dst_uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_attestation_report { __u8 mnonce[16]; __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_send_start { __u32 policy; + __u32 pad0; __u64 pdh_cert_uaddr; __u32 pdh_cert_len; + __u32 pad1; __u64 plat_certs_uaddr; __u32 plat_certs_len; + __u32 pad2; __u64 amd_certs_uaddr; __u32 amd_certs_len; + __u32 pad3; __u64 session_uaddr; __u32 session_len; + __u32 pad4; }; struct kvm_sev_send_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_receive_start { @@ -772,17 +790,22 @@ struct kvm_sev_receive_start { __u32 policy; __u64 pdh_uaddr; __u32 pdh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_receive_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6bc3456a8ebf..a1efa7907a0b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { __u32 token; __u8 pad[56]; - __u32 enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a42d8a6f7149..c342c4aa9c68 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1687,11 +1687,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 30335182b6b0..e92ff0c11db8 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr) return !bcmp(pad, insn_buff, tmpl_size); } -int x86_call_depth_emit_accounting(u8 **pprog, void *func) +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) { unsigned int tmpl_size = SKL_TMPL_SIZE; u8 insn_buff[MAX_PATCH_LEN]; @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, *pprog, + apply_relocation(insn_buff, tmpl_size, ip, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..cb9eece55904 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: @@ -527,7 +535,6 @@ clear_sev: static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; if (c->x86 >= 0xf) @@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b..ab18185894df 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; @@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -301,8 +305,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -401,8 +402,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -659,8 +656,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; + + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; + return; + } if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) return; - ia32_cap = x86_read_arch_cap_msr(); - - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2695,15 +2763,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2780,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char *spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; + + return "; BHI: Vulnerable"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5c1e6d6be267..605c26c009c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Don't use AutoIBRS when SNP is enabled because it degrades host * userspace indirect branch performance. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || (cpu_has(c, X86_FEATURE_AUTOIBRS) && !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index b7174209d855..946813d816bf 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..af5aa2c754c2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, @@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index aaca8d235dc2..d17c9b71eb4a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } @@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493af..a7aa6eff4ae5 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. */ if (!has_0xb) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); @@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) static bool parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) @@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan) tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } +} + static void parse_topology_amd(struct topo_scan *tscan) { bool has_0xb = false; @@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan) void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); if (tscan->amd_nodes_per_pkg > 1) diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include #include #include #include @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4cadfd606e8e..7f0732bc0ccd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { flags = __this_cpu_read(apf_reason.flags); __this_cpu_write(apf_reason.flags, 0); } @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) inc_irq_stat(irq_hv_callback_count); - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { token = __this_cpu_read(apf_reason.token); kvm_async_pf_task_wake(token); __this_cpu_write(apf_reason.token, 0); @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); - __this_cpu_write(apf_reason.enabled, 1); + __this_cpu_write(async_pf_enabled, true); pr_debug("setup async PF for cpu %d\n", smp_processor_id()); } @@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__this_cpu_read(apf_reason.enabled)) + if (!__this_cpu_read(async_pf_enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __this_cpu_write(apf_reason.enabled, 0); + __this_cpu_write(async_pf_enabled, false); pr_debug("disable async PF for cpu %d\n", smp_processor_id()); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 9a5b372c706f..ed163c8c8604 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); static char *nmi_check_stall_msg[] = { /* */ -/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ @@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp) nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { msgp = "CPU entered NMI handler function, but has not exited"; - } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { - msgp = "CPU is handling NMIs"; - } else { - idx = ((nsp->idt_seq_snap & 0x1) << 2) | + } else if (nsp->idt_nmi_seq_snap == nmi_seq || + nsp->idt_nmi_seq_snap + 1 == nmi_seq) { + idx = ((nmi_seq & 0x1) << 2) | (cpu_is_offline(cpu) << 1) | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) modp = ", but OK because ignore_nmis was set"; - if (nmi_seq & 0x1) - msghp = " (CPU currently in NMI handler function)"; - else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) msghp = " (CPU exited one NMI handler function)"; + else if (nmi_seq & 0x1) + msghp = " (CPU currently in NMI handler function)"; + else + msghp = " (CPU was never in an NMI handler function)"; + } else { + msgp = "CPU is handling NMIs"; } - pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", - __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); + pr_alert("%s: last activity: %lu jiffies ago.\n", + __func__, j - READ_ONCE(nsp->recv_jiffies)); } } diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ef206500ed6f..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this @@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b59b09c2f284..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, unsigned long vaddr_end, int op) { @@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a42830dc151b..d5dc5a92635a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include #include #include #include @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3aaf7e86a859..0ebdd088f28b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -122,6 +122,7 @@ config KVM_AMD_SEV default y depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a88bb14266b6..addc44fc7187 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,11 +3,6 @@ ccflags-y += -I $(srctree)/arch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror -ifeq ($(CONFIG_FRAME_POINTER),y) -OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y -OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y -endif - include $(srctree)/virt/kvm/Makefile.kvm kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index adba49afb5fe..77352a4abd87 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, - const char *sig) +static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries, + int nent, const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_hypervisor_cpuid_base(base) { - entry = kvm_find_cpuid_entry(vcpu, base); + entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (entry) { u32 signature[3]; @@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp return cpuid; } -static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, - struct kvm_cpuid_entry2 *entries, int nent) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) +{ + return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, sig); +} + +static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries, + int nent, u32 kvm_cpuid_base) +{ + return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES, + KVM_CPUID_INDEX_NOT_SIGNIFICANT); +} + +static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) { u32 base = vcpu->arch.kvm_cpuid.base; if (!base) return NULL; - return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, - KVM_CPUID_INDEX_NOT_SIGNIFICANT); -} - -static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) -{ - return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent); + return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, base); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; + struct kvm_hypervisor_cpuid kvm_cpuid; best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); - if (kvm_hlt_in_guest(vcpu->kvm) && best && - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); + if (kvm_cpuid.base) { + best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); + if (kvm_hlt_in_guest(vcpu->kvm) && best) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + } if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); @@ -366,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 856e3037e74f..23dbb9eb277c 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf37586f0466..ebf41023be38 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 992e651540e8..db007a4dffa2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, @@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ - vcpu->arch.root_mmu.root_role.word = 0; - vcpu->arch.guest_mmu.root_role.word = 0; - vcpu->arch.nested_mmu.root_role.word = 0; + vcpu->arch.root_mmu.root_role.invalid = 1; + vcpu->arch.guest_mmu.root_role.invalid = 1; + vcpu->arch.nested_mmu.root_role.invalid = 1; vcpu->arch.root_mmu.cpu_role.ext.valid = 0; vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; @@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, * by the memslot, KVM can't use a hugepage due to the * misaligned address regardless of memory attributes. */ - if (gfn >= slot->base_gfn) { + if (gfn >= slot->base_gfn && + gfn + nr_pages <= slot->base_gfn + slot->npages) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d078157e62aa..04c1f0957fea 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, } } -/* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. - */ +static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp) +{ + /* + * All TDP MMU shadow pages share the same role as their root, aside + * from level, so it is valid to key off any shadow page to determine if + * write protection is needed for an entire tree. + */ + return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled(); +} + static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end) { - u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK; + const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; bool spte_set = false; @@ -1573,7 +1577,7 @@ retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (!(iter.old_spte & dbit)) @@ -1590,11 +1594,9 @@ retry: } /* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. + * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the + * memslot. Returns true if an SPTE has been changed and the TLBs need to be + * flushed. */ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, const struct kvm_memory_slot *slot) @@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, return spte_set; } -/* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. - */ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, unsigned long mask, bool wrprot) { - u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK : - shadow_dirty_mask; + const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; lockdep_assert_held_write(&kvm->mmu_lock); @@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, if (!mask) break; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (iter.level > PG_LEVEL_4K || @@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, } /* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. + * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for + * which a bit is set in mask, starting at gfn. The given memslot is expected to + * contain all the GFNs represented by set bits in the mask. */ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c397b28e3d1b..a593b03c9aed 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_data_cfg_mask = ~0ull; bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); - if (vcpu->kvm->arch.enable_pmu) - static_call(kvm_x86_pmu_refresh)(vcpu); + if (!vcpu->kvm->arch.enable_pmu) + return; + + static_call(kvm_x86_pmu_refresh)(vcpu); + + /* + * At RESET, both Intel and AMD CPUs set all enable bits for general + * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that + * was written for v1 PMUs don't unknowingly leave GP counters disabled + * in the global controls). Emulate that behavior when refreshing the + * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL. + */ + if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) + pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); } void kvm_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561..2f4e155080ba 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) /* CPUID level 0x80000007 (EDX). */ @@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } @@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ae0ac12382b9..759581bb2128 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -84,9 +84,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; + int ret; + + if (min_asid > max_asid) + return -ENOTTY; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -179,7 +186,8 @@ again: mutex_unlock(&sev_bitmap_lock); - return asid; + sev->asid = asid; + return 0; e_uncharge: sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); @@ -187,7 +195,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; - int asid, ret; + int ret; if (kvm->created_vcpus) return -EINVAL; - ret = -EBUSY; if (unlikely(sev->active)) - return ret; + return -EINVAL; sev->active = true; sev->es_active = argp->id == KVM_SEV_ES_INIT; - asid = sev_asid_new(sev); - if (asid < 0) + ret = sev_asid_new(sev); + if (ret) goto e_no_asid; - sev->asid = asid; init_args.probe = false; ret = sev_platform_init(&init_args); @@ -287,8 +293,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -428,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT); else pages = kmalloc(size, GFP_KERNEL_ACCOUNT); @@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", @@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) unsigned long pfn; struct page *p; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d1a9f9951635..9aaf83c8d57d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd) +{ + return page_address(sd->save_area) + 0x400; +} + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * or subsequent vmload of host save area. */ vmsave(sd->save_area_pa); - if (sev_es_guest(vcpu->kvm)) { - struct sev_es_save_area *hostsa; - hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); - - sev_es_prepare_switch_to_guest(hostsa); - } + if (sev_es_guest(vcpu->kvm)) + sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd)); if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); @@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); guest_state_enter_irqoff(); @@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) - __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted, + sev_es_host_save_area(sd)); else __svm_vcpu_run(svm, spec_ctrl_intercepted); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7f1fbd874c45..33878efdebc8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); /* vmenter.S */ -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted, + struct sev_es_save_area *hostsa); void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #define DEFINE_KVM_GHCB_ACCESSORS(field) \ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 187018c424bf..a0c8eb37d3e1 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "kvm-asm-offsets.h" @@ -67,7 +68,7 @@ "", X86_FEATURE_V_SPEC_CTRL 901: .endm -.macro RESTORE_HOST_SPEC_CTRL_BODY +.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req 900: /* Same for after vmexit. */ mov $MSR_IA32_SPEC_CTRL, %ecx @@ -76,7 +77,7 @@ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, * if it was not intercepted during guest execution. */ - cmpb $0, (%_ASM_SP) + cmpb $0, \spec_ctrl_intercepted jnz 998f rdmsr movl %eax, SVM_spec_ctrl(%_ASM_DI) @@ -99,6 +100,7 @@ */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP + mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 push %r15 push %r14 @@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run) RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP) 10: cmpb $0, _ASM_RIP(kvm_rebooting) jne 2b @@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run) SYM_FUNC_END(__svm_vcpu_run) +#ifdef CONFIG_KVM_AMD_SEV + + +#ifdef CONFIG_X86_64 +#define SEV_ES_GPRS_BASE 0x300 +#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE) +#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE) +#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE) +#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE) +#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE) +#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE) +#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE) +#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE) +#endif + /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) - push %_ASM_BP -#ifdef CONFIG_X86_64 - push %r15 - push %r14 - push %r13 - push %r12 -#else - push %edi - push %esi -#endif - push %_ASM_BX + FRAME_BEGIN /* - * Save variables needed after vmexit on the stack, in inverse - * order compared to when they are needed. + * Save non-volatile (callee-saved) registers to the host save area. + * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not + * saved on VMRUN. */ + mov %rbp, SEV_ES_RBP (%rdx) + mov %r15, SEV_ES_R15 (%rdx) + mov %r14, SEV_ES_R14 (%rdx) + mov %r13, SEV_ES_R13 (%rdx) + mov %r12, SEV_ES_R12 (%rdx) + mov %rbx, SEV_ES_RBX (%rdx) - /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ - push %_ASM_ARG2 - - /* Save @svm. */ - push %_ASM_ARG1 - -.ifnc _ASM_ARG1, _ASM_DI /* - * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX - * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + * Save volatile registers that hold arguments that are needed after + * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted). */ - mov %_ASM_ARG1, %_ASM_DI -.endif + mov %rdi, SEV_ES_RDI (%rdx) + mov %rsi, SEV_ES_RSI (%rdx) - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX (@hostsa). */ RESTORE_GUEST_SPEC_CTRL /* Get svm->current_vmcb->pa into RAX. */ - mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX - mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX + mov SVM_current_vmcb(%rdi), %rax + mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ sti -1: vmrun %_ASM_AX +1: vmrun %rax 2: cli - /* Pop @svm to RDI, guest registers have been saved already. */ - pop %_ASM_DI - #ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL /* @@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET_VM - /* "Pop" @spec_ctrl_intercepted. */ - pop %_ASM_BX - - pop %_ASM_BX - -#ifdef CONFIG_X86_64 - pop %r12 - pop %r13 - pop %r14 - pop %r15 -#else - pop %esi - pop %edi -#endif - pop %_ASM_BP + FRAME_END RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY %sil -3: cmpb $0, _ASM_RIP(kvm_rebooting) +3: cmpb $0, kvm_rebooting(%rip) jne 2b ud2 _ASM_EXTABLE(1b, 3b) SYM_FUNC_END(__svm_sev_es_vcpu_run) +#endif /* CONFIG_KVM_AMD_SEV */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 88659de4d2a7..c6b4b1728006 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 12ade343a17e..be40474de6e4 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) perf_capabilities = vcpu_get_perf_capabilities(vcpu); if (cpuid_model_is_consistent(vcpu) && (perf_capabilities & PMU_CAP_LBR_FMT)) - x86_perf_get_lbr(&lbr_desc->records); + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); else lbr_desc->records.nr = 0; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2bfbf758d061..f6986dee6f8c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY_VMEXIT + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c37a89eda90f..22411f4aff53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444); int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); +struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); @@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_update_exception_bitmap(vcpu); } -static u64 vmx_get_perf_capabilities(void) +static __init u64 vmx_get_perf_capabilities(void) { u64 perf_cap = PMU_CAP_FW_WRITES; - struct x86_pmu_lbr lbr; u64 host_perf_cap = 0; if (!enable_pmu) @@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { - x86_perf_get_lbr(&lbr); - if (lbr.nr) + x86_perf_get_lbr(&vmx_lbr_caps); + + /* + * KVM requires LBR callstack support, as the overhead due to + * context switching LBRs without said support is too high. + * See intel_pmu_create_guest_lbr_event() for more info. + */ + if (!vmx_lbr_caps.has_callstack) + memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps)); + else if (vmx_lbr_caps.nr) perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 65786dbe7d60..90f9e4434646 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -15,6 +15,7 @@ #include "vmx_ops.h" #include "../cpuid.h" #include "run_flags.h" +#include "../mmu.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -109,6 +110,8 @@ struct lbr_desc { bool msr_passthrough; }; +extern struct x86_pmu_lbr vmx_lbr_caps; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) if (!enable_ept) return true; - return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + return allow_smaller_maxphyaddr && + cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits(); } static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47d9f03b7778..91478b769af0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { @@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 721b528da9ac..391059b2c6fb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) .popsection .pushsection .text..__x86.rethunk_safe @@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" #else /* !CONFIG_MITIGATION_SRSO */ +/* Dummy for the alternative in CALL_UNTRAIN_RET. */ +SYM_CODE_START(srso_alias_untrain_ret) + ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #define JMP_SRSO_UNTRAIN_RET "ud2" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" #endif /* CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_UNRET_ENTRY @@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ - JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ - JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS + ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) @@ -377,8 +382,15 @@ SYM_FUNC_END(call_depth_return_thunk) SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ + defined(CONFIG_MITIGATION_SRSO) || \ + defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS +#else + ANNOTATE_UNRET_SAFE + ret +#endif int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a204a332c71f..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_leaf(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 70b91de2e053..422602f6039b 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -492,6 +492,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 104544359d69..025fd7ea5d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include #include +#include #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a7ba8e178645..df5fac428408 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); - x86_call_depth_emit_accounting(pprog, func); + ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } @@ -1972,20 +1972,17 @@ populate_extable: /* call */ case BPF_JMP | BPF_CALL: { - int offs; + u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); - if (!imm32) - return -EINVAL; - offs = 7 + x86_call_depth_emit_accounting(&prog, func); - } else { - if (!imm32) - return -EINVAL; - offs = x86_call_depth_emit_accounting(&prog, func); + ip += 7; } - if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + if (!imm32) + return -EINVAL; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + if (emit_call(&prog, func, ip)) return -EINVAL; break; } @@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ - x86_call_depth_emit_accounting(&prog, NULL); + x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile index 1e36502cd738..ea343fc392dc 100644 --- a/arch/x86/virt/Makefile +++ b/arch/x86/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += vmx/ +obj-y += svm/ vmx/ diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..ab0e8448bb6e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void) u64 rmptable_size; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; if (!amd_iommu_snp_en) - return 0; + goto nosnp; if (!probed_rmp_size) goto nosnp; @@ -225,7 +225,7 @@ skip_enable: return 0; nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return -ENOSYS; } @@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) { struct rmpentry *large_entry, *entry; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return ERR_PTR(-ENODEV); entry = get_rmpentry(pfn); @@ -363,7 +363,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} diff --git a/block/bdev.c b/block/bdev.c index 7a5f611c3d2e..4dc94145eb53 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder, mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); mutex_unlock(&bdev_lock); - - if (hops && hops->get_holder) - hops->get_holder(holder); } /** @@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming); static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); - const struct blk_holder_ops *hops = bdev->bd_holder_ops; bool unblock = false; /* @@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder) whole->bd_holder = NULL; mutex_unlock(&bdev_lock); - if (hops && hops->put_holder) - hops->put_holder(holder); - /* * If this was the last claim, remove holder link and unblock evpoll if * it was a write holder. @@ -652,6 +645,14 @@ static void blkdev_flush_mapping(struct block_device *bdev) bdev_write_inode(bdev); } +static void blkdev_put_whole(struct block_device *bdev) +{ + if (atomic_dec_and_test(&bdev->bd_openers)) + blkdev_flush_mapping(bdev); + if (bdev->bd_disk->fops->release) + bdev->bd_disk->fops->release(bdev->bd_disk); +} + static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode) { struct gendisk *disk = bdev->bd_disk; @@ -670,20 +671,21 @@ static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode) if (!atomic_read(&bdev->bd_openers)) set_init_blocksize(bdev); - if (test_bit(GD_NEED_PART_SCAN, &disk->state)) - bdev_disk_changed(disk, false); atomic_inc(&bdev->bd_openers); + if (test_bit(GD_NEED_PART_SCAN, &disk->state)) { + /* + * Only return scanning errors if we are called from contexts + * that explicitly want them, e.g. the BLKRRPART ioctl. + */ + ret = bdev_disk_changed(disk, false); + if (ret && (mode & BLK_OPEN_STRICT_SCAN)) { + blkdev_put_whole(bdev); + return ret; + } + } return 0; } -static void blkdev_put_whole(struct block_device *bdev) -{ - if (atomic_dec_and_test(&bdev->bd_openers)) - blkdev_flush_mapping(bdev); - if (bdev->bd_disk->fops->release) - bdev->bd_disk->fops->release(bdev->bd_disk); -} - static int blkdev_get_part(struct block_device *part, blk_mode_t mode) { struct gendisk *disk = part->bd_disk; @@ -776,17 +778,17 @@ void blkdev_put_no_open(struct block_device *bdev) static bool bdev_writes_blocked(struct block_device *bdev) { - return bdev->bd_writers == -1; + return bdev->bd_writers < 0; } static void bdev_block_writes(struct block_device *bdev) { - bdev->bd_writers = -1; + bdev->bd_writers--; } static void bdev_unblock_writes(struct block_device *bdev) { - bdev->bd_writers = 0; + bdev->bd_writers++; } static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) @@ -813,6 +815,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) bdev->bd_writers++; } +static inline bool bdev_unclaimed(const struct file *bdev_file) +{ + return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host); +} + static void bdev_yield_write_access(struct file *bdev_file) { struct block_device *bdev; @@ -820,14 +827,15 @@ static void bdev_yield_write_access(struct file *bdev_file) if (bdev_allow_write_mounted) return; + if (bdev_unclaimed(bdev_file)) + return; + bdev = file_bdev(bdev_file); - /* Yield exclusive or shared write access. */ - if (bdev_file->f_mode & FMODE_WRITE) { - if (bdev_writes_blocked(bdev)) - bdev_unblock_writes(bdev); - else - bdev->bd_writers--; - } + + if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) + bdev_unblock_writes(bdev); + else if (bdev_file->f_mode & FMODE_WRITE) + bdev->bd_writers--; } /** @@ -907,6 +915,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; bdev_file->f_mapping = bdev->bd_inode->i_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = holder; @@ -1012,6 +1022,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_file_open_by_path); +static inline void bd_yield_claim(struct file *bdev_file) +{ + struct block_device *bdev = file_bdev(bdev_file); + void *holder = bdev_file->private_data; + + lockdep_assert_held(&bdev->bd_disk->open_mutex); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder))) + return; + + if (!bdev_unclaimed(bdev_file)) + bd_end_claim(bdev, holder); +} + void bdev_release(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); @@ -1036,7 +1060,7 @@ void bdev_release(struct file *bdev_file) bdev_yield_write_access(bdev_file); if (holder) - bd_end_claim(bdev, holder); + bd_yield_claim(bdev_file); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -1056,6 +1080,39 @@ put_no_open: blkdev_put_no_open(bdev); } +/** + * bdev_fput - yield claim to the block device and put the file + * @bdev_file: open block device + * + * Yield claim on the block device and put the file. Ensure that the + * block device can be reclaimed before the file is closed which is a + * deferred operation. + */ +void bdev_fput(struct file *bdev_file) +{ + if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops)) + return; + + if (bdev_file->private_data) { + struct block_device *bdev = file_bdev(bdev_file); + struct gendisk *disk = bdev->bd_disk; + + mutex_lock(&disk->open_mutex); + bdev_yield_write_access(bdev_file); + bd_yield_claim(bdev_file); + /* + * Tell release we already gave up our hold on the + * device and if write restrictions are available that + * we already gave up write access to the device. + */ + bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host); + mutex_unlock(&disk->open_mutex); + } + + fput(bdev_file); +} +EXPORT_SYMBOL(bdev_fput); + /** * lookup_bdev() - Look up a struct block_device by name. * @pathname: Name of the block device in the filesystem. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index bdbb557feb5a..059467086b13 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) return 0; } +void blkg_init_queue(struct request_queue *q) +{ + INIT_LIST_HEAD(&q->blkg_list); + mutex_init(&q->blkcg_mutex); +} + int blkcg_init_disk(struct gendisk *disk) { struct request_queue *q = disk->queue; @@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk) bool preloaded; int ret; - INIT_LIST_HEAD(&q->blkg_list); - mutex_init(&q->blkcg_mutex); - new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); if (!new_blkg) return -ENOMEM; diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 78b74106bf10..90b3959d88cf 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -189,6 +189,7 @@ struct blkcg_policy { extern struct blkcg blkcg_root; extern bool blkcg_debug_stats; +void blkg_init_queue(struct request_queue *q); int blkcg_init_disk(struct gendisk *disk); void blkcg_exit_disk(struct gendisk *disk); @@ -482,6 +483,7 @@ struct blkcg { }; static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline void blkg_init_queue(struct request_queue *q) { } static inline int blkcg_init_disk(struct gendisk *disk) { return 0; } static inline void blkcg_exit_disk(struct gendisk *disk) { } static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } diff --git a/block/blk-core.c b/block/blk-core.c index a16b5abdbbf5..b795ac177281 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) init_waitqueue_head(&q->mq_freeze_wq); mutex_init(&q->mq_freeze_lock); + blkg_init_queue(q); + /* * Init percpu_ref in atomic mode so that it's faster to shutdown. * See blk_register_queue() for details. @@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) if (unlikely(!rq_list_empty(plug->cached_rq))) blk_mq_free_plug_rqs(plug); + plug->cur_ktime = 0; current->flags &= ~PF_BLOCK_TS; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9a85bfbbc45a..690ca99dfaca 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); - u64 tdelta, delay, new_delay; + u64 tdelta, delay, new_delay, shift; s64 vover, vover_pct; u32 hwa; @@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; - if (iocg->delay) - delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC); + shift = div64_u64(tdelta, USEC_PER_SEC); + if (iocg->delay && shift < BITS_PER_LONG) + delay = iocg->delay >> shift; else delay = 0; @@ -1438,8 +1439,11 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, lockdep_assert_held(&iocg->ioc->lock); lockdep_assert_held(&iocg->waitq.lock); - /* make sure that nobody messed with @iocg */ - WARN_ON_ONCE(list_empty(&iocg->active_list)); + /* + * make sure that nobody messed with @iocg. Check iocg->pd.online + * to avoid warn when removing blkcg or disk. + */ + WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online); WARN_ON_ONCE(iocg->inuse > 1); iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt); diff --git a/block/blk-merge.c b/block/blk-merge.c index 2a06fd33039d..4e3483a16b75 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -726,7 +726,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, * which can be mixed are set in each bio and mark @rq as mixed * merged. */ -void blk_rq_set_mixed_merge(struct request *rq) +static void blk_rq_set_mixed_merge(struct request *rq) { blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; struct bio *bio; diff --git a/block/blk-mq.c b/block/blk-mq.c index 555ada922cf0..32afb87efbd0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -770,16 +770,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. - * For such case, force the completed nbytes to be equal to - * the BIO size so that bio_advance() sets the BIO remaining - * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) { + if (bio->bi_iter.bi_size != nbytes) bio->bi_status = BLK_STS_IOERR; - nbytes = bio->bi_iter.bi_size; - } else { + else bio->bi_iter.bi_sector = rq->__sector; - } } bio_advance(bio, nbytes); diff --git a/block/blk-settings.c b/block/blk-settings.c index 3c7d8d638ab5..d2731843f2fc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -146,8 +146,7 @@ static int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors > max_hw_sectors || - lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); } else { @@ -183,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim) return -EINVAL; /* - * Devices that require a virtual boundary do not support scatter/gather - * I/O natively, but instead require a descriptor list entry for each - * page (which might not be identical to the Linux PAGE_SIZE). Because - * of that they are not limited by our notion of "segment size". + * Stacking device may have both virtual boundary and max segment + * size limit, so allow this setting now, and long-term the two + * might need to move out of stacking limits since we have immutable + * bvec and lower layer bio splitting is supposed to handle the two + * correctly. */ - if (lim->virt_boundary_mask) { - if (WARN_ON_ONCE(lim->max_segment_size && - lim->max_segment_size != UINT_MAX)) - return -EINVAL; - lim->max_segment_size = UINT_MAX; - } else { + if (!lim->virt_boundary_mask) { /* * The maximum segment size has an odd historic 64k default that * drivers probably should override. Just like the I/O size we diff --git a/block/blk.h b/block/blk.h index 5cac4e29ae17..d9f584984bc4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -339,7 +339,6 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); unsigned int blk_recalc_rq_segments(struct request *rq); -void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); diff --git a/block/ioctl.c b/block/ioctl.c index 0c76137adcaa..f505f9c341eb 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -96,7 +96,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { uint64_t range[2]; - uint64_t start, len; + uint64_t start, len, end; struct inode *inode = bdev->bd_inode; int err; @@ -117,7 +117,8 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (len & 511) return -EINVAL; - if (start + len > bdev_nr_bytes(bdev)) + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; filemap_invalidate_lock(inode->i_mapping); @@ -562,7 +563,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return -EACCES; if (bdev_is_partition(bdev)) return -EINVAL; - return disk_scan_partitions(bdev->bd_disk, mode); + return disk_scan_partitions(bdev->bd_disk, + mode | BLK_OPEN_STRICT_SCAN); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACETEARDOWN: diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c index 05402ef8964e..8aecbe4637f3 100644 --- a/crypto/asymmetric_keys/mscode_parser.c +++ b/crypto/asymmetric_keys/mscode_parser.c @@ -75,6 +75,9 @@ int mscode_note_digest_algo(void *context, size_t hdrlen, oid = look_up_OID(value, vlen); switch (oid) { + case OID_sha1: + ctx->digest_algo = "sha1"; + break; case OID_sha256: ctx->digest_algo = "sha256"; break; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 5b08c50722d0..231ad7b3789d 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -227,6 +227,9 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { + case OID_sha1: + ctx->sinfo->sig->hash_algo = "sha1"; + break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; @@ -278,6 +281,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, ctx->sinfo->sig->pkey_algo = "rsa"; ctx->sinfo->sig->encoding = "pkcs1"; break; + case OID_id_ecdsa_with_sha1: case OID_id_ecdsa_with_sha224: case OID_id_ecdsa_with_sha256: case OID_id_ecdsa_with_sha384: diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index e5f22691febd..e314fd57e6f8 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -115,7 +115,8 @@ software_key_determine_akcipher(const struct public_key *pkey, */ if (!hash_algo) return -EINVAL; - if (strcmp(hash_algo, "sha224") != 0 && + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && strcmp(hash_algo, "sha256") != 0 && strcmp(hash_algo, "sha384") != 0 && strcmp(hash_algo, "sha512") != 0 && diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c index 398983be77e8..2deff81f8af5 100644 --- a/crypto/asymmetric_keys/signature.c +++ b/crypto/asymmetric_keys/signature.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(decrypt_blob); * Sign the specified data blob using the private key specified by params->key. * The signature is wrapped in an encoding if params->encoding is specified * (eg. "pkcs1"). If the encoding needs to know the digest type, this can be - * passed through params->hash_algo (eg. "sha512"). + * passed through params->hash_algo (eg. "sha1"). * * Returns the length of the data placed in the signature buffer or an error. */ diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 487204d39426..bb0bffa271b5 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -198,6 +198,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, default: return -ENOPKG; /* Unsupported combination */ + case OID_sha1WithRSAEncryption: + ctx->cert->sig->hash_algo = "sha1"; + goto rsa_pkcs1; + case OID_sha256WithRSAEncryption: ctx->cert->sig->hash_algo = "sha256"; goto rsa_pkcs1; @@ -214,6 +218,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, ctx->cert->sig->hash_algo = "sha224"; goto rsa_pkcs1; + case OID_id_ecdsa_with_sha1: + ctx->cert->sig->hash_algo = "sha1"; + goto ecdsa; + case OID_id_rsassa_pkcs1_v1_5_with_sha3_256: ctx->cert->sig->hash_algo = "sha3-256"; goto rsa_pkcs1; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 986f331a5fc2..12e1c892f366 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -653,6 +653,30 @@ static const struct akcipher_testvec rsa_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { { .key = + "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1" + "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68" + "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08" + "\x98", + .key_len = 49, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x01", + .param_len = 21, + .m = + "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae" + "\x63\x85\xe7\x82", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x35\x02\x19\x00\xba\xe5\x93\x83\x6e\xb6\x3b\x63\xa0\x27\x91" + "\xc6\xf6\x7f\xc3\x09\xad\x59\xad\x88\x27\xd6\x92\x6b\x02\x18\x10" + "\x68\x01\x9d\xba\xce\x83\x08\xef\x95\x52\x7b\xa0\x0f\xe4\x18\x86" + "\x80\x6f\xa5\x79\x77\xda\xd0", + .c_size = 55, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd" "\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75" "\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee" @@ -756,6 +780,32 @@ static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { { .key = + "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41" + "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9" + "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc" + "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8" + "\xaf", + .key_len = 65, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x07", + .param_len = 21, + .m = + "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c" + "\x0b\xde\x6a\x42", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x46\x02\x21\x00\xf9\x25\xce\x9f\x3a\xa6\x35\x81\xcf\xd4\xe7" + "\xb7\xf0\x82\x56\x41\xf7\xd4\xad\x8d\x94\x5a\x69\x89\xee\xca\x6a" + "\x52\x0e\x48\x4d\xcc\x02\x21\x00\xd7\xe4\xef\x52\x66\xd3\x5b\x9d" + "\x8a\xfa\x54\x93\x29\xa7\x70\x86\xf1\x03\x03\xf3\x3b\xe2\x73\xf7" + "\xfb\x9d\x8b\xde\xd4\x8d\x6f\xad", + .c_size = 72, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9" "\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0" "\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88" @@ -866,6 +916,36 @@ static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = { { + .key = /* secp384r1(sha1) */ + "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1" + "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f" + "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42" + "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e" + "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e" + "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3" + "\xf1", + .key_len = 97, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x22", + .param_len = 18, + .m = + "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22" + "\x3a\x69\xc1\x93", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x66\x02\x31\x00\xf5\x0f\x24\x4c\x07\x93\x6f\x21\x57\x55\x07" + "\x20\x43\x30\xde\xa0\x8d\x26\x8e\xae\x63\x3f\xbc\x20\x3a\xc6\xf1" + "\x32\x3c\xce\x70\x2b\x78\xf1\x4c\x26\xe6\x5b\x86\xcf\xec\x7c\x7e" + "\xd0\x87\xd7\xd7\x6e\x02\x31\x00\xcd\xbb\x7e\x81\x5d\x8f\x63\xc0" + "\x5f\x63\xb1\xbe\x5e\x4c\x0e\xa1\xdf\x28\x8c\x1b\xfa\xf9\x95\x88" + "\x74\xa0\x0f\xbf\xaf\xc3\x36\x76\x4a\xa1\x59\xf1\x1c\xa4\x58\x26" + "\x79\x12\x2a\xb7\xc5\x15\x92\xc5", + .c_size = 104, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { .key = /* secp384r1(sha224) */ "\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0" "\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18" diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 39f6d1b98fd6..51d3f1a55d02 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include @@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param return 0; } -static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate) -{ - int ret; - - ret = ivpu_rpm_get_if_active(vdev); - if (ret < 0) - return ret; - - *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0; - - if (ret) - ivpu_rpm_put(vdev); - - return 0; -} - static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - ret = ivpu_get_core_clock_rate(vdev, &args->value); + args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev) { int ret; - ivpu_prepare_for_reset(vdev); + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); ret = ivpu_hw_power_down(vdev); if (ret) ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + return ret; } @@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); - xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); @@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev) /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); if (ret) - goto err_power_down; + goto err_shutdown; ret = ivpu_mmu_global_context_init(vdev); if (ret) - goto err_power_down; + goto err_shutdown; ret = ivpu_mmu_init(vdev); if (ret) @@ -601,10 +588,8 @@ err_mmu_rctx_fini: ivpu_mmu_reserved_context_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); -err_power_down: - ivpu_hw_power_down(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); +err_shutdown: + ivpu_shutdown(vdev); err_xa_destroy: xa_destroy(&vdev->db_xa); xa_destroy(&vdev->submitted_jobs_xa); @@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_pm_disable(vdev); + ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); - if (IVPU_WA(d3hot_after_power_off)) - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); ivpu_jobs_abort_all(vdev); ivpu_job_done_consumer_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 7be0500d9bb8..bb4374d0eaec 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_DRV_H__ @@ -90,7 +90,6 @@ struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; - bool d3hot_after_power_off; bool interrupt_clear_with_0; bool disable_clock_relinquish; bool disable_d0i3_msg; diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index b2909168a0a6..094c659d2800 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -21,6 +21,7 @@ struct ivpu_hw_ops { u32 (*profiling_freq_get)(struct ivpu_device *vdev); void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable); u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); + u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio); u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); @@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) return vdev->hw->ops->reg_pll_freq_get(vdev); }; +static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +{ + return vdev->hw->ops->ratio_to_freq(vdev, ratio); +} + static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) { return vdev->hw->ops->reg_telemetry_offset_get(vdev); diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 9a0c9498baba..bd25e2d9fb0f 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include "ivpu_drv.h" @@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) { vdev->wa.punit_disabled = false; vdev->wa.clear_runtime_mem = false; - vdev->wa.d3hot_after_power_off = true; REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { @@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) IVPU_PRINT_WA(punit_disabled); IVPU_PRINT_WA(clear_runtime_mem); - IVPU_PRINT_WA(d3hot_after_power_off); IVPU_PRINT_WA(interrupt_clear_with_0); } @@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */ } -static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config) +static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) { u32 pll_clock = PLL_REF_CLK_FREQ * ratio; u32 cpu_clock; - if ((config & 0xff) == PLL_RATIO_4_3) + if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3) cpu_clock = pll_clock * 2 / 4; else cpu_clock = pll_clock * 2 / 5; @@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev) if (!ivpu_is_silicon(vdev)) return PLL_SIMULATION_FREQ; - return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config); + return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio); } static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev) @@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = { .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get, .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive, .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get, + .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq, .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get, .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get, .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get, diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index e4eddbf5d11c..b0b88d4c8926 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev) return PLL_RATIO_TO_FREQ(pll_curr_ratio); } +static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio); +} + static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev) { return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET); @@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = { .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get, .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive, .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get, + .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq, .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get, .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get, .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get, diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 04ac4b9840fb..56ff067f63e2 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include @@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev) spin_lock_init(&ipc->cons_lock); INIT_LIST_HEAD(&ipc->cons_list); INIT_LIST_HEAD(&ipc->cb_msg_list); - drmm_mutex_init(&vdev->drm, &ipc->lock); + ret = drmm_mutex_init(&vdev->drm, &ipc->lock); + if (ret) { + ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret); + goto err_free_rx; + } ivpu_ipc_reset(vdev); return 0; diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 91bd640655ab..2e46b322c450 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd) case IVPU_MMU_EVT_F_VMS_FETCH: return "Fetch of VMS caused external abort"; default: - return "Unknown CMDQ command"; + return "Unknown event"; } } @@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err) { switch (err) { case IVPU_MMU_CERROR_NONE: - return "No CMDQ Error"; + return "No error"; case IVPU_MMU_CERROR_ILL: return "Illegal command"; case IVPU_MMU_CERROR_ABT: - return "External abort on CMDQ read"; + return "External abort on command queue read"; case IVPU_MMU_CERROR_ATC_INV_SYNC: return "Sync failed to complete ATS invalidation"; default: - return "Unknown CMDQ Error"; + return "Unknown error"; } } diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 7cce1c928a7f..4f5ea466731f 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include @@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; - /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ - pci_save_state(to_pci_dev(vdev->drm.dev)); + ivpu_prepare_for_reset(vdev); ret = ivpu_shutdown(vdev); if (ret) - ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret); return ret; } @@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev) { int ret; - pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); - pci_restore_state(to_pci_dev(vdev->drm.dev)); - retry: + pci_restore_state(to_pci_dev(vdev->drm.dev)); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); + ret = ivpu_hw_power_up(vdev); if (ret) { ivpu_err(vdev, "Failed to power up HW: %d\n", ret); @@ -100,6 +97,7 @@ err_mmu_disable: ivpu_mmu_disable(vdev); err_power_down: ivpu_hw_power_down(vdev); + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); if (!ivpu_fw_is_cold_boot(vdev)) { ivpu_pm_prepare_cold_boot(vdev); diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 1fbc9b921c4f..736c2eb8c0f3 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -574,7 +574,7 @@ static u_long get_word(struct vc_data *vc) } attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr); buf[cnt++] = attr_ch; - while (tmpx < vc->vc_cols - 1) { + while (tmpx < vc->vc_cols - 1 && cnt < sizeof(buf) - 1) { tmp_pos += 2; tmpx++; ch = get_char(vc, (u_short *)tmp_pos, &temp); diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c34..c9131259f717 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 66e7f529e92f..01faca3a238a 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -851,7 +851,7 @@ err_put_table: return rc; } -static void __exit einj_remove(struct platform_device *pdev) +static void einj_remove(struct platform_device *pdev) { struct apei_exec_context ctx; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 7c157bf92695..d1464324de95 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev) if (dep->honor_dep) adev->flags.honor_deps = 1; - adev->dep_unmet++; + if (!dep->met) + adev->dep_unmet++; } } } diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 302dce0b2b50..d67881b50bca 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -662,14 +662,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz, { int result; - tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz", - trip_table, - trip_count, - tz, - &acpi_thermal_zone_ops, - NULL, - passive_delay, - tz->polling_frequency * 100); + if (trip_count) + tz->thermal_zone = thermal_zone_device_register_with_trips( + "acpitz", trip_table, trip_count, tz, + &acpi_thermal_zone_ops, NULL, passive_delay, + tz->polling_frequency * 100); + else + tz->thermal_zone = thermal_tripless_zone_device_register( + "acpitz", tz, &acpi_thermal_zone_ops, NULL); + if (IS_ERR(tz->thermal_zone)) return PTR_ERR(tz->thermal_zone); @@ -901,11 +902,8 @@ static int acpi_thermal_add(struct acpi_device *device) trip++; } - if (trip == trip_table) { + if (trip == trip_table) pr_warn(FW_BUG "No valid trip points!\n"); - result = -ENODEV; - goto free_memory; - } result = acpi_thermal_register_thermal_zone(tz, trip_table, trip - trip_table, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bad28cf42010..dd6923d37931 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1708,8 +1708,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 562302e2e57c..6548f10e61d9 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1; module_param(mobile_lpm_policy, int, 0644); MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); +static char *ahci_mask_port_map; +module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444); +MODULE_PARM_DESC(mask_port_map, + "32-bits port map masks to ignore controllers ports. " + "Valid values are: " + "\"\" to apply the same mask to all AHCI controller " + "devices, and \"=,=,...\" to " + "specify different masks for the controllers specified, " + "where is the PCI ID of an AHCI controller in the " + "form \"domain:bus:dev.func\""); + +static void ahci_apply_port_map_mask(struct device *dev, + struct ahci_host_priv *hpriv, char *mask_s) +{ + unsigned int mask; + + if (kstrtouint(mask_s, 0, &mask)) { + dev_err(dev, "Invalid port map mask\n"); + return; + } + + hpriv->mask_port_map = mask; +} + +static void ahci_get_port_map_mask(struct device *dev, + struct ahci_host_priv *hpriv) +{ + char *param, *end, *str, *mask_s; + char *name; + + if (!strlen(ahci_mask_port_map)) + return; + + str = kstrdup(ahci_mask_port_map, GFP_KERNEL); + if (!str) + return; + + /* Handle single mask case */ + if (!strchr(str, '=')) { + ahci_apply_port_map_mask(dev, hpriv, str); + goto free; + } + + /* + * Mask list case: parse the parameter to apply the mask only if + * the device name matches. + */ + param = str; + end = param + strlen(param); + while (param && param < end && *param) { + name = param; + param = strchr(name, '='); + if (!param) + break; + + *param = '\0'; + param++; + if (param >= end) + break; + + if (strcmp(dev_name(dev), name) != 0) { + param = strchr(param, ','); + if (param) + param++; + continue; + } + + mask_s = param; + param = strchr(mask_s, ','); + if (param) { + *param = '\0'; + param++; + } + + ahci_apply_port_map_mask(dev, hpriv, mask_s); + } + +free: + kfree(str); +} + static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { @@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n"); } + /* Handle port map masks passed as module parameter. */ + if (ahci_mask_port_map) + ahci_get_port_map_mask(&pdev->dev, hpriv); + ahci_save_initial_config(&pdev->dev, hpriv); } diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index d4a626f87963..79a8b0aa37bf 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -30,7 +30,6 @@ #define ST_AHCI_OOBR_CIMAX_SHIFT 0 struct st_ahci_drv_data { - struct platform_device *ahci; struct reset_control *pwr; struct reset_control *sw_rst; struct reset_control *pwr_rst; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index be3412cdb22e..c449d60d9bb9 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev) bool cdl_enabled; u64 val; - if (ata_id_major_version(dev->id) < 12) + if (ata_id_major_version(dev->id) < 11) goto not_supported; if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) || diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b0d6e69c4a5b..214b935c2ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0a0f483124c3..e954976891a9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4744,25 +4745,34 @@ void ata_scsi_dev_rescan(struct work_struct *work) * bail out. */ if (ap->pflags & ATA_PFLAG_SUSPENDED) - goto unlock; + goto unlock_ap; if (!sdev) continue; if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock_scan; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); if (ret) - goto unlock; + goto unlock_ap; } } -unlock: +unlock_ap: spin_unlock_irqrestore(ap->lock, flags); +unlock_scan: mutex_unlock(&ap->scsi_scan_mutex); /* Reschedule with a delay if scsi_rescan_device() returned an error */ diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 4ac854f6b057..88b2e9817f49 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1371,9 +1371,6 @@ static struct pci_driver pata_macio_pci_driver = { .suspend = pata_macio_pci_suspend, .resume = pata_macio_pci_resume, #endif - .driver = { - .owner = THIS_MODULE, - }, }; MODULE_DEVICE_TABLE(pci, pata_macio_pci_match); diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index 400b22ee99c3..4c270999ba3c 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -200,7 +200,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge) pclk = sg->sata0_pclk; else pclk = sg->sata1_pclk; - clk_enable(pclk); + ret = clk_enable(pclk); + if (ret) + return ret; + msleep(10); /* Do not keep clocking a bridge that is not online */ diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index e82786c63fbd..9bec0aee92e0 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { }, }; -static const struct pci_device_id mv_pci_tbl[] = { - { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, - { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1720/174x have different identifiers */ - { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, - - { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, - { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, - { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, - - { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, - - /* Adaptec 1430SA */ - { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, - - /* Marvell 7042 support */ - { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, - - /* Highpoint RocketRAID PCIe series */ - { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, - { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, - - { } /* terminate list */ -}; - static const struct mv_hw_ops mv5xxx_ops = { .phy_errata = mv5_phy_errata, .enable_leds = mv5_enable_leds, @@ -4303,6 +4272,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, static int mv_pci_device_resume(struct pci_dev *pdev); #endif +static const struct pci_device_id mv_pci_tbl[] = { + { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, + { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, + + { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, + { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, + { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, + + { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, + + /* Adaptec 1430SA */ + { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, + + /* Marvell 7042 support */ + { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, + + /* Highpoint RocketRAID PCIe series */ + { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, + { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, + + { } /* terminate list */ +}; static struct pci_driver mv_pci_driver = { .name = DRV_NAME, @@ -4315,6 +4314,7 @@ static struct pci_driver mv_pci_driver = { #endif }; +MODULE_DEVICE_TABLE(pci, mv_pci_tbl); /** * mv_print_info - Dump key info to kernel log for perusal. @@ -4487,7 +4487,6 @@ static void __exit mv_exit(void) MODULE_AUTHOR("Brett Russ"); MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, mv_pci_tbl); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index b51d7a9d0d90..a482741eb181 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; @@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_toio(dimm_mmio + offset / 4, psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); diff --git a/drivers/base/core.c b/drivers/base/core.c index 78dfa74ee18b..4d51928c4088 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -533,12 +534,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, @@ -4156,9 +4171,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index 41edd6a430eb..55999a50ccc0 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, unsigned long *entry, *lower, *upper; unsigned long lower_index, lower_last; unsigned long upper_index, upper_last; - int ret; + int ret = 0; lower = NULL; upper = NULL; @@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, upper_index = max + 1; upper_last = mas.last; - upper = kmemdup(&entry[max + 1], + upper = kmemdup(&entry[max - mas.index + 1], ((mas.last - max) * sizeof(unsigned long)), map->alloc_flags); @@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min, unsigned long lmin = min; unsigned long lmax = max; unsigned int r, v, sync_start; - int ret; + int ret = 0; bool sync_needed = false; map->cache_bypass = true; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 71c39bcd872c..ed33cf7192d2 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1965,10 +1965,10 @@ static int null_add_dev(struct nullb_device *dev) out_ida_free: ida_free(&nullb_indexes, nullb->index); -out_cleanup_zone: - null_free_zoned_dev(dev); out_cleanup_disk: put_disk(nullb->disk); +out_cleanup_zone: + null_free_zoned_dev(dev); out_cleanup_tags: if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index b40b32fa7f1c..19cfc342fc7b 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8a60ad7acd70..ecbc52eaf101 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -226,6 +225,7 @@ struct qca_serdev { struct qca_power *bt_power; u32 init_speed; u32 oper_speed; + bool bdaddr_property_broken; const char *firmware_name; }; @@ -1843,6 +1843,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + struct qca_serdev *qcadev; const char *soc_name; ret = qca_check_speeds(hu); @@ -1904,16 +1905,11 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bdaddr_property_broken) + set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -2295,6 +2291,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); + qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, + "qcom,local-bd-address-broken"); + if (data) qcadev->btsoc_type = data->soc_type; else diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c index 89ed6cd6b059..e9cc8b4786fb 100644 --- a/drivers/cache/sifive_ccache.c +++ b/drivers/cache/sifive_ccache.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device) return IRQ_HANDLED; } +static int sifive_ccache_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned long quirks; + int intr_num, rc; + + quirks = (unsigned long)device_get_match_data(dev); + + intr_num = platform_irq_count(pdev); + if (!intr_num) + return dev_err_probe(dev, -ENODEV, "No interrupts property\n"); + + for (int i = 0; i < intr_num; i++) { + if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR)) + continue; + + g_irq[i] = platform_get_irq(pdev, i); + if (g_irq[i] < 0) + return g_irq[i]; + + rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL); + if (rc) + return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]); + } + + return 0; +} + +static struct platform_driver sifive_ccache_driver = { + .probe = sifive_ccache_probe, + .driver = { + .name = "sifive_ccache", + .of_match_table = sifive_ccache_ids, + }, +}; + static int __init sifive_ccache_init(void) { struct device_node *np; struct resource res; - int i, rc, intr_num; const struct of_device_id *match; unsigned long quirks; + int rc; np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match); if (!np) @@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void) goto err_unmap; } - intr_num = of_property_count_u32_elems(np, "interrupts"); - if (!intr_num) { - pr_err("No interrupts property\n"); - rc = -ENODEV; - goto err_unmap; - } - - for (i = 0; i < intr_num; i++) { - g_irq[i] = irq_of_parse_and_map(np, i); - - if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR)) - continue; - - rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc", - NULL); - if (rc) { - pr_err("Could not request IRQ %d\n", g_irq[i]); - goto err_free_irq; - } - } - of_node_put(np); - #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) { riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE; @@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void) #ifdef CONFIG_DEBUG_FS setup_sifive_debug(); #endif + + rc = platform_driver_register(&sifive_ccache_driver); + if (rc) + goto err_unmap; + + of_node_put(np); + return 0; -err_free_irq: - while (--i >= 0) - free_irq(g_irq[i], NULL); err_unmap: iounmap(ccache_base); err_node_put: diff --git a/drivers/char/random.c b/drivers/char/random.c index 456be28ba67c..2597cb43f438 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -702,7 +702,7 @@ static void extract_entropy(void *buf, size_t len) static void __cold _credit_init_bits(size_t bits) { - static struct execute_work set_ready; + static DECLARE_WORK(set_ready, crng_set_ready); unsigned int new, orig, add; unsigned long flags; @@ -718,8 +718,8 @@ static void __cold _credit_init_bits(size_t bits) if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */ - if (static_key_initialized) - execute_in_process_context(crng_set_ready, &set_ready); + if (static_key_initialized && system_unbound_wq) + queue_work(system_unbound_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); @@ -890,8 +890,8 @@ void __init random_init(void) /* * If we were initialized by the cpu or bootloader before jump labels - * are initialized, then we should enable the static branch here, where - * it's guaranteed that jump labels have been initialized. + * or workqueues are initialized, then we should enable the static + * branch here, where it's guaranteed that these have been initialized. */ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) crng_set_ready(NULL); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 25371c91a58f..8cca52be993f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -37,6 +37,10 @@ static HLIST_HEAD(clk_root_list); static HLIST_HEAD(clk_orphan_list); static LIST_HEAD(clk_notifier_list); +/* List of registered clks that use runtime PM */ +static HLIST_HEAD(clk_rpm_list); +static DEFINE_MUTEX(clk_rpm_list_lock); + static const struct hlist_head *all_lists[] = { &clk_root_list, &clk_orphan_list, @@ -59,6 +63,7 @@ struct clk_core { struct clk_hw *hw; struct module *owner; struct device *dev; + struct hlist_node rpm_node; struct device_node *of_node; struct clk_core *parent; struct clk_parent_map *parents; @@ -122,6 +127,89 @@ static void clk_pm_runtime_put(struct clk_core *core) pm_runtime_put_sync(core->dev); } +/** + * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices + * + * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so + * that disabling unused clks avoids a deadlock where a device is runtime PM + * resuming/suspending and the runtime PM callback is trying to grab the + * prepare_lock for something like clk_prepare_enable() while + * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime + * PM resume/suspend the device as well. + * + * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on + * success. Otherwise the lock is released on failure. + * + * Return: 0 on success, negative errno otherwise. + */ +static int clk_pm_runtime_get_all(void) +{ + int ret; + struct clk_core *core, *failed; + + /* + * Grab the list lock to prevent any new clks from being registered + * or unregistered until clk_pm_runtime_put_all(). + */ + mutex_lock(&clk_rpm_list_lock); + + /* + * Runtime PM "get" all the devices that are needed for the clks + * currently registered. Do this without holding the prepare_lock, to + * avoid the deadlock. + */ + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + ret = clk_pm_runtime_get(core); + if (ret) { + failed = core; + pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", + dev_name(failed->dev), failed->name); + goto err; + } + } + + return 0; + +err: + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + if (core == failed) + break; + + clk_pm_runtime_put(core); + } + mutex_unlock(&clk_rpm_list_lock); + + return ret; +} + +/** + * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices + * + * Put the runtime PM references taken in clk_pm_runtime_get_all() and release + * the 'clk_rpm_list_lock'. + */ +static void clk_pm_runtime_put_all(void) +{ + struct clk_core *core; + + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) + clk_pm_runtime_put(core); + mutex_unlock(&clk_rpm_list_lock); +} + +static void clk_pm_runtime_init(struct clk_core *core) +{ + struct device *dev = core->dev; + + if (dev && pm_runtime_enabled(dev)) { + core->rpm_enabled = true; + + mutex_lock(&clk_rpm_list_lock); + hlist_add_head(&core->rpm_node, &clk_rpm_list); + mutex_unlock(&clk_rpm_list_lock); + } +} + /*** locking ***/ static void clk_prepare_lock(void) { @@ -1381,9 +1469,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) if (core->flags & CLK_IGNORE_UNUSED) return; - if (clk_pm_runtime_get(core)) - return; - if (clk_core_is_prepared(core)) { trace_clk_unprepare(core); if (core->ops->unprepare_unused) @@ -1392,8 +1477,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); } - - clk_pm_runtime_put(core); } static void __init clk_disable_unused_subtree(struct clk_core *core) @@ -1409,9 +1492,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(core->parent); - if (clk_pm_runtime_get(core)) - goto unprepare_out; - flags = clk_enable_lock(); if (core->enable_count) @@ -1436,8 +1516,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) unlock_out: clk_enable_unlock(flags); - clk_pm_runtime_put(core); -unprepare_out: if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(core->parent); } @@ -1453,6 +1531,7 @@ __setup("clk_ignore_unused", clk_ignore_unused_setup); static int __init clk_disable_unused(void) { struct clk_core *core; + int ret; if (clk_ignore_unused) { pr_warn("clk: Not disabling unused clocks\n"); @@ -1461,6 +1540,13 @@ static int __init clk_disable_unused(void) pr_info("clk: Disabling unused clocks\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; + /* + * Grab the prepare lock to keep the clk topology stable while iterating + * over clks. + */ clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) @@ -1477,6 +1563,8 @@ static int __init clk_disable_unused(void) clk_prepare_unlock(); + clk_pm_runtime_put_all(); + return 0; } late_initcall_sync(clk_disable_unused); @@ -3252,9 +3340,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, { struct clk_core *child; - clk_pm_runtime_get(c); clk_summary_show_one(s, c, level); - clk_pm_runtime_put(c); hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); @@ -3264,11 +3350,15 @@ static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; struct hlist_head **lists = s->private; + int ret; seq_puts(s, " enable prepare protect duty hardware connection\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; clk_prepare_lock(); @@ -3277,6 +3367,7 @@ static int clk_summary_show(struct seq_file *s, void *data) clk_summary_show_subtree(s, c, 0); clk_prepare_unlock(); + clk_pm_runtime_put_all(); return 0; } @@ -3324,8 +3415,14 @@ static int clk_dump_show(struct seq_file *s, void *data) struct clk_core *c; bool first_node = true; struct hlist_head **lists = s->private; + int ret; + + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; seq_putc(s, '{'); + clk_prepare_lock(); for (; *lists; lists++) { @@ -3338,6 +3435,7 @@ static int clk_dump_show(struct seq_file *s, void *data) } clk_prepare_unlock(); + clk_pm_runtime_put_all(); seq_puts(s, "}\n"); return 0; @@ -3981,8 +4079,6 @@ static int __clk_core_init(struct clk_core *core) } clk_core_reparent_orphans_nolock(); - - kref_init(&core->ref); out: clk_pm_runtime_put(core); unlock: @@ -4211,6 +4307,22 @@ static void clk_core_free_parent_map(struct clk_core *core) kfree(core->parents); } +/* Free memory allocated for a struct clk_core */ +static void __clk_release(struct kref *ref) +{ + struct clk_core *core = container_of(ref, struct clk_core, ref); + + if (core->rpm_enabled) { + mutex_lock(&clk_rpm_list_lock); + hlist_del(&core->rpm_node); + mutex_unlock(&clk_rpm_list_lock); + } + + clk_core_free_parent_map(core); + kfree_const(core->name); + kfree(core); +} + static struct clk * __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) { @@ -4231,6 +4343,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_out; } + kref_init(&core->ref); + core->name = kstrdup_const(init->name, GFP_KERNEL); if (!core->name) { ret = -ENOMEM; @@ -4243,9 +4357,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) } core->ops = init->ops; - if (dev && pm_runtime_enabled(dev)) - core->rpm_enabled = true; core->dev = dev; + clk_pm_runtime_init(core); core->of_node = np; if (dev && dev->driver) core->owner = dev->driver->owner; @@ -4285,12 +4398,10 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) hw->clk = NULL; fail_create_clk: - clk_core_free_parent_map(core); fail_parents: fail_ops: - kfree_const(core->name); fail_name: - kfree(core); + kref_put(&core->ref, __clk_release); fail_out: return ERR_PTR(ret); } @@ -4370,18 +4481,6 @@ int of_clk_hw_register(struct device_node *node, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(of_clk_hw_register); -/* Free memory allocated for a clock. */ -static void __clk_release(struct kref *ref) -{ - struct clk_core *core = container_of(ref, struct clk_core, ref); - - lockdep_assert_held(&prepare_lock); - - clk_core_free_parent_map(core); - kfree_const(core->name); - kfree(core); -} - /* * Empty clk_ops for unregistered clocks. These are used temporarily * after clk_unregister() was called on a clock and until last clock @@ -4472,7 +4571,8 @@ void clk_unregister(struct clk *clk) if (ops == &clk_nodrv_ops) { pr_err("%s: unregistered clock: %s\n", __func__, clk->core->name); - goto unlock; + clk_prepare_unlock(); + return; } /* * Assign empty clock ops for consumers that might still hold @@ -4506,11 +4606,10 @@ void clk_unregister(struct clk *clk) if (clk->core->protect_count) pr_warn("%s: unregistering protected clock: %s\n", __func__, clk->core->name); + clk_prepare_unlock(); kref_put(&clk->core->ref, __clk_release); free_clk(clk); -unlock: - clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_unregister); @@ -4669,13 +4768,11 @@ void __clk_put(struct clk *clk) if (clk->min_rate > 0 || clk->max_rate < ULONG_MAX) clk_set_rate_range_nolock(clk, 0, ULONG_MAX); - owner = clk->core->owner; - kref_put(&clk->core->ref, __clk_release); - clk_prepare_unlock(); + owner = clk->core->owner; + kref_put(&clk->core->ref, __clk_release); module_put(owner); - free_clk(clk); } diff --git a/drivers/clk/mediatek/clk-mt7988-infracfg.c b/drivers/clk/mediatek/clk-mt7988-infracfg.c index 449041f8abbc..c8c023afe3e5 100644 --- a/drivers/clk/mediatek/clk-mt7988-infracfg.c +++ b/drivers/clk/mediatek/clk-mt7988-infracfg.c @@ -156,7 +156,7 @@ static const struct mtk_gate infra_clks[] = { GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P1, "infra_pcie_peri_ck_26m_ck_p1", "csw_infra_f26m_sel", 8), GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P2, "infra_pcie_peri_ck_26m_ck_p2", - "csw_infra_f26m_sel", 9), + "infra_pcie_peri_ck_26m_ck_p3", 9), GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P3, "infra_pcie_peri_ck_26m_ck_p3", "csw_infra_f26m_sel", 10), /* INFRA1 */ diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index 2e55368dc4d8..bd37ab4d1a9b 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "clk-mtk.h" @@ -494,6 +495,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM; } + + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; num_clks += mcd->num_fixed_clks + mcd->num_factor_clks; @@ -574,6 +585,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, goto unregister_clks; } + pm_runtime_put(&pdev->dev); + return r; unregister_clks: @@ -604,6 +617,8 @@ free_data: free_base: if (mcd->shared_io && base) iounmap(base); + + pm_runtime_put(&pdev->dev); return r; } diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c index 4536ed43f65b..84dce5184a77 100644 --- a/drivers/comedi/drivers/vmk80xx.c +++ b/drivers/comedi/drivers/vmk80xx.c @@ -641,32 +641,21 @@ static int vmk80xx_find_usb_endpoints(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_host_interface *iface_desc = intf->cur_altsetting; - struct usb_endpoint_descriptor *ep_desc; - int i; + struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc; + int ret; - if (iface_desc->desc.bNumEndpoints != 2) + if (devpriv->model == VMK8061_MODEL) + ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc, + &ep_tx_desc, NULL, NULL); + else + ret = usb_find_common_endpoints(iface_desc, NULL, NULL, + &ep_rx_desc, &ep_tx_desc); + + if (ret) return -ENODEV; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - ep_desc = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(ep_desc) || - usb_endpoint_is_bulk_in(ep_desc)) { - if (!devpriv->ep_rx) - devpriv->ep_rx = ep_desc; - continue; - } - - if (usb_endpoint_is_int_out(ep_desc) || - usb_endpoint_is_bulk_out(ep_desc)) { - if (!devpriv->ep_tx) - devpriv->ep_tx = ep_desc; - continue; - } - } - - if (!devpriv->ep_rx || !devpriv->ep_tx) - return -ENODEV; + devpriv->ep_rx = ep_rx_desc; + devpriv->ep_tx = ep_tx_desc; if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx)) return -EINVAL; diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index f44efbb89c34..2102377f727b 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1090,7 +1090,7 @@ static int __sev_snp_init_locked(int *error) void *arg = &data; int cmd, rc = 0; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; sev = psp->sev_data; diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 1cd304de5388..b2191ade9011 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -806,6 +806,8 @@ static int save_iaa_wq(struct idxd_wq *wq) return -EINVAL; cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; out: return 0; } @@ -821,10 +823,12 @@ static void remove_iaa_wq(struct idxd_wq *wq) } } - if (nr_iaa) + if (nr_iaa) { cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; - else - cpus_per_iaa = 0; + if (!cpus_per_iaa) + cpus_per_iaa = 1; + } else + cpus_per_iaa = 1; } static int wq_table_add_wqs(int iaa, int cpu) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 67998dbd1d46..5f3c9c5529b9 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -144,17 +144,4 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. -config CXL_PMU - tristate "CXL Performance Monitoring Unit" - default CXL_BUS - depends on PERF_EVENTS - help - Support performance monitoring as defined in CXL rev 3.0 - section 13.2: Performance Monitoring. CXL components may have - one or more CXL Performance Monitoring Units (CPMUs). - - Say 'y/m' to enable a driver that will attach to performance - monitoring units and provide standard perf based interfaces. - - If unsure say 'm'. endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index af5cb818f84d..cb8c155a2c9b 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) { struct acpi_device *hb = to_cxl_host_bridge(NULL, dev); u32 uid; - int rc; if (kstrtou32(acpi_device_uid(hb), 0, &uid)) return -EINVAL; - rc = acpi_get_genport_coordinates(uid, dport->hb_coord); - if (rc < 0) - return rc; - - /* Adjust back to picoseconds from nanoseconds */ - for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { - dport->hb_coord[i].read_latency *= 1000; - dport->hb_coord[i].write_latency *= 1000; - } - - return 0; + return acpi_get_genport_coordinates(uid, dport->coord); } static int add_host_bridge_dport(struct device *match, void *arg) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index eddbbe21450c..bb83867d9fec 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -14,12 +14,42 @@ struct dsmas_entry { struct range dpa_range; u8 handle; - struct access_coordinate coord; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; int entries; int qos_class; }; +static u32 cdat_normalize(u16 entry, u64 base, u8 type) +{ + u32 value; + + /* + * Check for invalid and overflow values + */ + if (entry == 0xffff || !entry) + return 0; + else if (base > (UINT_MAX / (entry))) + return 0; + + /* + * CDAT fields follow the format of HMAT fields. See table 5 Device + * Scoped Latency and Bandwidth Information Structure in Coherent Device + * Attribute Table (CDAT) Specification v1.01. + */ + value = entry * base; + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + case ACPI_HMAT_READ_LATENCY: + case ACPI_HMAT_WRITE_LATENCY: + value = DIV_ROUND_UP(value, 1000); + break; + default: + break; + } + return value; +} + static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, const unsigned long end) { @@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, return 0; } -static void cxl_access_coordinate_set(struct access_coordinate *coord, - int access, unsigned int val) +static void __cxl_access_coordinate_set(struct access_coordinate *coord, + int access, unsigned int val) { switch (access) { case ACPI_HMAT_ACCESS_LATENCY: @@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord, } } +static void cxl_access_coordinate_set(struct access_coordinate *coord, + int access, unsigned int val) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) + __cxl_access_coordinate_set(&coord[i], access, val); +} + static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, const unsigned long end) { @@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, __le16 le_val; u64 val; u16 len; - int rc; len = le16_to_cpu((__force __le16)hdr->length); if (len != size || (unsigned long)hdr + len > end) { @@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, le_base = (__force __le64)dslbis->entry_base_unit; le_val = (__force __le16)dslbis->entry[0]; - rc = check_mul_overflow(le64_to_cpu(le_base), - le16_to_cpu(le_val), &val); - if (rc) - pr_warn("DSLBIS value overflowed.\n"); + val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), + dslbis->data_type); - cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val); + cxl_access_coordinate_set(dent->coord, dslbis->data_type, val); return 0; } @@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port, static int cxl_port_perf_data_calculate(struct cxl_port *port, struct xarray *dsmas_xa) { - struct access_coordinate ep_c; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; struct dsmas_entry *dent; int valid_entries = 0; unsigned long index; int rc; - rc = cxl_endpoint_get_perf_coordinates(port, &ep_c); + rc = cxl_endpoint_get_perf_coordinates(port, ep_c); if (rc) { dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n"); return rc; } - rc = cxl_hb_get_perf_coordinates(port, coord); - if (rc) { - dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); - return rc; - } - struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); if (!cxl_root) @@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, xa_for_each(dsmas_xa, index, dent) { int qos_class; - cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c); - /* - * Keeping the host bridge coordinates separate from the dsmas - * coordinates in order to allow calculation of access class - * 0 and 1 for region later. - */ - cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU], - &coord[ACCESS_COORDINATE_CPU], - &dent->coord); + cxl_coordinates_combine(dent->coord, dent->coord, ep_c); dent->entries = 1; rc = cxl_root->ops->qos_class(cxl_root, - &coord[ACCESS_COORDINATE_CPU], + &dent->coord[ACCESS_COORDINATE_CPU], 1, &qos_class); if (rc != 1) continue; @@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, struct cxl_dpa_perf *dpa_perf) { + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) + dpa_perf->coord[i] = dent->coord[i]; dpa_perf->dpa_range = dent->dpa_range; - dpa_perf->coord = dent->coord; dpa_perf->qos_class = dent->qos_class; dev_dbg(dev, "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", dent->dpa_range.start, dpa_perf->qos_class, - dent->coord.read_bandwidth, dent->coord.write_bandwidth, - dent->coord.read_latency, dent->coord.write_latency); + dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth, + dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth, + dent->coord[ACCESS_COORDINATE_CPU].read_latency, + dent->coord[ACCESS_COORDINATE_CPU].write_latency); } static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, @@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, le_base = (__force __le64)tbl->sslbis_header.entry_base_unit; le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth; - - if (check_mul_overflow(le64_to_cpu(le_base), - le16_to_cpu(le_val), &val)) - dev_warn(dev, "SSLBIS value overflowed!\n"); + val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), + sslbis->data_type); xa_for_each(&port->dports, index, dport) { if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || - dsp_id == dport->port_id) - cxl_access_coordinate_set(&dport->sw_coord, + dsp_id == dport->port_id) { + cxl_access_coordinate_set(dport->coord, sslbis->data_type, val); + } } } @@ -493,16 +514,9 @@ void cxl_switch_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); -/** - * cxl_coordinates_combine - Combine the two input coordinates - * - * @out: Output coordinate of c1 and c2 combined - * @c1: input coordinates - * @c2: input coordinates - */ -void cxl_coordinates_combine(struct access_coordinate *out, - struct access_coordinate *c1, - struct access_coordinate *c2) +static void __cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2) { if (c1->write_bandwidth && c2->write_bandwidth) out->write_bandwidth = min(c1->write_bandwidth, @@ -515,23 +529,34 @@ void cxl_coordinates_combine(struct access_coordinate *out, out->read_latency = c1->read_latency + c2->read_latency; } +/** + * cxl_coordinates_combine - Combine the two input coordinates + * + * @out: Output coordinate of c1 and c2 combined + * @c1: input coordinates + * @c2: input coordinates + */ +void cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) + __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]); +} + MODULE_IMPORT_NS(CXL); void cxl_region_perf_data_calculate(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlmd->endpoint; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate coord; struct range dpa = { .start = cxled->dpa_res->start, .end = cxled->dpa_res->end, }; struct cxl_dpa_perf *perf; - int rc; switch (cxlr->mode) { case CXL_DECODER_RAM: @@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, if (!range_contains(&perf->dpa_range, &dpa)) return; - rc = cxl_hb_get_perf_coordinates(port, hb_coord); - if (rc) { - dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); - return; - } - for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { - /* Pickup the host bridge coords */ - cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord); - /* Get total bandwidth and the worst latency for the cxl region */ cxlr->coord[i].read_latency = max_t(unsigned int, cxlr->coord[i].read_latency, - coord.read_latency); + perf->coord[i].read_latency); cxlr->coord[i].write_latency = max_t(unsigned int, cxlr->coord[i].write_latency, - coord.write_latency); - cxlr->coord[i].read_bandwidth += coord.read_bandwidth; - cxlr->coord[i].write_bandwidth += coord.write_bandwidth; - - /* - * Convert latency to nanosec from picosec to be consistent - * with the resulting latency coordinates computed by the - * HMAT_REPORTING code. - */ - cxlr->coord[i].read_latency = - DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000); - cxlr->coord[i].write_latency = - DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000); + perf->coord[i].write_latency); + cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth; + cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 9adda4795eb7..f0f54aeccc87 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, payload->handles[i++] = gen->hdr.handle; dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log, - le16_to_cpu(payload->handles[i])); + le16_to_cpu(payload->handles[i - 1])); if (i == max_handles) { payload->nr_recs = i; @@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, .payload_in = &log_type, .size_in = sizeof(log_type), .payload_out = payload, - .size_out = mds->payload_size, .min_out = struct_size(payload, records, 0), }; do { int rc, i; + mbox_cmd.size_out = mds->payload_size; + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) { dev_err_ratelimited(dev, diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2b0cab556072..762783bb091a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); -/** - * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator - * and host bridge - * - * @port: endpoint cxl_port - * @coord: output access coordinates - * - * Return: errno on failure, 0 on success. - */ -int cxl_hb_get_perf_coordinates(struct cxl_port *port, - struct access_coordinate *coord) +static void add_latency(struct access_coordinate *c, long latency) { - struct cxl_port *iter = port; - struct cxl_dport *dport; + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + c[i].write_latency += latency; + c[i].read_latency += latency; + } +} - if (!is_cxl_endpoint(port)) - return -EINVAL; - - dport = iter->parent_dport; - while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { - iter = to_cxl_port(iter->dev.parent); - dport = iter->parent_dport; +static bool coordinates_valid(struct access_coordinate *c) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + if (c[i].read_bandwidth && c[i].write_bandwidth && + c[i].read_latency && c[i].write_latency) + continue; + return false; } - coord[ACCESS_COORDINATE_LOCAL] = - dport->hb_coord[ACCESS_COORDINATE_LOCAL]; - coord[ACCESS_COORDINATE_CPU] = - dport->hb_coord[ACCESS_COORDINATE_CPU]; + return true; +} - return 0; +static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + c[i].write_bandwidth = min(c[i].write_bandwidth, bw); + c[i].read_bandwidth = min(c[i].read_bandwidth, bw); + } +} + +static void set_access_coordinates(struct access_coordinate *out, + struct access_coordinate *in) +{ + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) + out[i] = in[i]; +} + +static bool parent_port_is_cxl_root(struct cxl_port *port) +{ + return is_cxl_root(to_cxl_port(port->dev.parent)); } /** @@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port, int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord) { - struct access_coordinate c = { - .read_bandwidth = UINT_MAX, - .write_bandwidth = UINT_MAX, + struct access_coordinate c[] = { + { + .read_bandwidth = UINT_MAX, + .write_bandwidth = UINT_MAX, + }, + { + .read_bandwidth = UINT_MAX, + .write_bandwidth = UINT_MAX, + }, }; struct cxl_port *iter = port; struct cxl_dport *dport; struct pci_dev *pdev; unsigned int bw; + bool is_cxl_root; if (!is_cxl_endpoint(port)) return -EINVAL; - dport = iter->parent_dport; - /* - * Exit the loop when the parent port of the current port is cxl root. - * The iterative loop starts at the endpoint and gathers the - * latency of the CXL link from the current iter to the next downstream - * port each iteration. If the parent is cxl root then there is - * nothing to gather. + * Exit the loop when the parent port of the current iter port is cxl + * root. The iterative loop starts at the endpoint and gathers the + * latency of the CXL link from the current device/port to the connected + * downstream port each iteration. */ - while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { - cxl_coordinates_combine(&c, &c, &dport->sw_coord); - c.write_latency += dport->link_latency; - c.read_latency += dport->link_latency; - - iter = to_cxl_port(iter->dev.parent); + do { dport = iter->parent_dport; - } + iter = to_cxl_port(iter->dev.parent); + is_cxl_root = parent_port_is_cxl_root(iter); + + /* + * There's no valid access_coordinate for a root port since RPs do not + * have CDAT and therefore needs to be skipped. + */ + if (!is_cxl_root) { + if (!coordinates_valid(dport->coord)) + return -EINVAL; + cxl_coordinates_combine(c, c, dport->coord); + } + add_latency(c, dport->link_latency); + } while (!is_cxl_root); + + dport = iter->parent_dport; + /* Retrieve HB coords */ + if (!coordinates_valid(dport->coord)) + return -EINVAL; + cxl_coordinates_combine(c, c, dport->coord); /* Get the calculated PCI paths bandwidth */ pdev = to_pci_dev(port->uport_dev->parent); @@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, return -ENXIO; bw /= BITS_PER_BYTE; - c.write_bandwidth = min(c.write_bandwidth, bw); - c.read_bandwidth = min(c.read_bandwidth, bw); - - *coord = c; + set_min_bandwidth(c, bw); + set_access_coordinates(coord, c); return 0; } diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 372786f80955..3c42f984eeaf 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { + u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); u64 offset = ((u64)reg_hi << 32) | (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); @@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar, - &pdev->resource[bar], &offset, map->reg_type); + &pdev->resource[bar], &offset, reg_type); return false; } - map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); + map->reg_type = reg_type; map->resource = pci_resource_start(pdev, bar) + offset; map->max_size = pci_resource_len(pdev, bar) - offset; return true; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 534e25e2f0a4..036d17db68e0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -663,8 +663,7 @@ struct cxl_rcrb_info { * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks - * @sw_coord: access coordinates (performance) for switch from CDAT - * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge) + * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { @@ -675,8 +674,7 @@ struct cxl_dport { bool rch; struct cxl_port *port; struct cxl_regs regs; - struct access_coordinate sw_coord; - struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; }; @@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); -int cxl_hb_get_perf_coordinates(struct cxl_port *port, - struct access_coordinate *coord); void cxl_region_perf_data_calculate(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 20fb3b35e89e..36cee9c30ceb 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -401,7 +401,7 @@ enum cxl_devtype { */ struct cxl_dpa_perf { struct range dpa_range; - struct access_coordinate coord; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; int qos_class; }; diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 9c2a0c082a76..ed4b323886e4 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig index a4cae73f20d3..20607ed54243 100644 --- a/drivers/dpll/Kconfig +++ b/drivers/dpll/Kconfig @@ -4,4 +4,4 @@ # config DPLL - bool + bool diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 7bc71f4be64a..38d19410a2be 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2060,6 +2060,8 @@ static void bus_reset_work(struct work_struct *work) ohci->generation = generation; reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); + if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS) + reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); if (ohci->quirks & QUIRK_RESET_PACKET) ohci->request_generation = generation; @@ -2125,12 +2127,14 @@ static irqreturn_t irq_handler(int irq, void *data) return IRQ_NONE; /* - * busReset and postedWriteErr must not be cleared yet + * busReset and postedWriteErr events must not be cleared yet * (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1) */ reg_write(ohci, OHCI1394_IntEventClear, event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr)); log_irqs(ohci, event); + if (event & OHCI1394_busReset) + reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset); if (event & OHCI1394_selfIDComplete) queue_work(selfid_workqueue, &ohci->bus_reset_work); diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index f2556a8e9401..9bc2e10381af 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -790,7 +790,7 @@ static void ffa_notification_info_get(void) part_id = packed_id_list[ids_processed++]; - if (!ids_count[list]) { /* Global Notification */ + if (ids_count[list] == 1) { /* Global Notification */ __do_sched_recv_cb(part_id, 0, false); continue; } diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c index ea9201e7044c..1fa79bba492e 100644 --- a/drivers/firmware/arm_scmi/powercap.c +++ b/drivers/firmware/arm_scmi/powercap.c @@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph, ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_PAI_GET, 4, domain, &fc[POWERCAP_FC_PAI].get_addr, NULL, - &fc[POWERCAP_PAI_GET].rate_limit); + &fc[POWERCAP_FC_PAI].rate_limit); *p_fc = fc; } diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 350573518503..130d13e9cd6b 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp) rd->raw = raw; filp->private_data = rd; - return 0; + return nonseekable_open(inode, filp); } static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp) @@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = { .open = scmi_dbg_raw_mode_open, .release = scmi_dbg_raw_mode_release, .write = scmi_dbg_raw_mode_reset_write, + .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = { .read = scmi_dbg_raw_mode_message_read, .write = scmi_dbg_raw_mode_message_write, .poll = scmi_dbg_raw_mode_message_poll, + .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = { .read = scmi_dbg_raw_mode_message_read, .write = scmi_dbg_raw_mode_message_async_write, .poll = scmi_dbg_raw_mode_message_poll, + .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = { .release = scmi_dbg_raw_mode_release, .read = scmi_test_dbg_raw_mode_notif_read, .poll = scmi_test_dbg_raw_mode_notif_poll, + .llseek = no_llseek, .owner = THIS_MODULE, }; @@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = { .release = scmi_dbg_raw_mode_release, .read = scmi_test_dbg_raw_mode_errors_read, .poll = scmi_test_dbg_raw_mode_errors_poll, + .llseek = no_llseek, .owner = THIS_MODULE, }; diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 7e1852859550..c41e7b2091cd 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -120,7 +120,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; + target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 6a6ffc6707bd..d5a8182cf2e1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -496,6 +496,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr->vid_mode = 0xffff; hdr->type_of_loader = 0x21; + hdr->initrd_addr_max = INT_MAX; /* Convert unicode cmdline to ascii */ cmdline_ptr = efi_convert_cmdline(image, &options_size); diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 1ee62cd58582..25db014494a4 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type) case 0x5e: return GPIOPANELCTL; default: - return -EOPNOTSUPP; + return -ENOTSUPP; } } diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 5ef8af824980..c097e310c9e8 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -529,6 +529,7 @@ static const struct of_device_id lpc32xx_gpio_of_match[] = { { .compatible = "nxp,lpc3220-gpio", }, { }, }; +MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match); static struct platform_driver lpc32xx_gpio_driver = { .driver = { diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c index c18b6b47384f..94ca9d03c094 100644 --- a/drivers/gpio/gpio-wcove.c +++ b/drivers/gpio/gpio-wcove.c @@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type) unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE; if (gpio >= WCOVE_GPIO_NUM) - return -EOPNOTSUPP; + return -ENOTSUPP; return reg + gpio; } diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index f384fa278764..d09c7d728365 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -728,6 +728,25 @@ static u32 line_event_id(int level) GPIO_V2_LINE_EVENT_FALLING_EDGE; } +static inline char *make_irq_label(const char *orig) +{ + char *new; + + if (!orig) + return NULL; + + new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL); + if (!new) + return ERR_PTR(-ENOMEM); + + return new; +} + +static inline void free_irq_label(const char *label) +{ + kfree(label); +} + #ifdef CONFIG_HTE static enum hte_return process_hw_ts_thread(void *p) @@ -1015,6 +1034,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) { unsigned long irqflags; int ret, level, irq; + char *label; /* try hardware */ ret = gpiod_set_debounce(line->desc, debounce_period_us); @@ -1037,11 +1057,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) if (irq < 0) return -ENXIO; + label = make_irq_label(line->req->label); + if (IS_ERR(label)) + return -ENOMEM; + irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING; ret = request_irq(irq, debounce_irq_handler, irqflags, - line->req->label, line); - if (ret) + label, line); + if (ret) { + free_irq_label(label); return ret; + } line->irq = irq; } else { ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH); @@ -1086,7 +1112,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc, static void edge_detector_stop(struct line *line) { if (line->irq) { - free_irq(line->irq, line); + free_irq_label(free_irq(line->irq, line)); line->irq = 0; } @@ -1110,6 +1136,7 @@ static int edge_detector_setup(struct line *line, unsigned long irqflags = 0; u64 eflags; int irq, ret; + char *label; eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS; if (eflags && !kfifo_initialized(&line->req->events)) { @@ -1146,11 +1173,17 @@ static int edge_detector_setup(struct line *line, IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; irqflags |= IRQF_ONESHOT; + label = make_irq_label(line->req->label); + if (IS_ERR(label)) + return PTR_ERR(label); + /* Request a thread to read the events */ ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread, - irqflags, line->req->label, line); - if (ret) + irqflags, label, line); + if (ret) { + free_irq_label(label); return ret; + } line->irq = irq; return 0; @@ -1973,7 +2006,7 @@ static void lineevent_free(struct lineevent_state *le) blocking_notifier_chain_unregister(&le->gdev->device_notifier, &le->device_unregistered_nb); if (le->irq) - free_irq(le->irq, le); + free_irq_label(free_irq(le->irq, le)); if (le->desc) gpiod_free(le->desc); kfree(le->label); @@ -2114,6 +2147,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) int fd; int ret; int irq, irqflags = 0; + char *label; if (copy_from_user(&eventreq, ip, sizeof(eventreq))) return -EFAULT; @@ -2198,15 +2232,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_le; + label = make_irq_label(le->label); + if (IS_ERR(label)) { + ret = PTR_ERR(label); + goto out_free_le; + } + /* Request a thread to read the events */ ret = request_threaded_irq(irq, lineevent_irq_handler, lineevent_irq_thread, irqflags, - le->label, + label, le); - if (ret) + if (ret) { + free_irq_label(label); goto out_free_le; + } le->irq = irq; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index ce94e37bcbee..94903fc1c145 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1175,6 +1175,9 @@ struct gpio_device *gpio_device_find(const void *data, list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { + if (!device_is_registered(&gdev->dev)) + continue; + guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); @@ -2397,6 +2400,11 @@ char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset) } EXPORT_SYMBOL_GPL(gpiochip_dup_line_label); +static inline const char *function_name_or_default(const char *con_id) +{ + return con_id ?: "(default)"; +} + /** * gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor * @gc: GPIO chip @@ -2425,10 +2433,11 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags) { struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum); + const char *name = function_name_or_default(label); int ret; if (IS_ERR(desc)) { - chip_err(gc, "failed to get GPIO descriptor\n"); + chip_err(gc, "failed to get GPIO %s descriptor\n", name); return desc; } @@ -2438,8 +2447,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, ret = gpiod_configure_flags(desc, label, lflags, dflags); if (ret) { - chip_err(gc, "setup of own GPIO %s failed\n", label); gpiod_free_commit(desc); + chip_err(gc, "setup of own GPIO %s failed\n", name); return ERR_PTR(ret); } @@ -4153,19 +4162,17 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, enum gpiod_flags *flags, unsigned long *lookupflags) { + const char *name = function_name_or_default(con_id); struct gpio_desc *desc = ERR_PTR(-ENOENT); if (is_of_node(fwnode)) { - dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags); } else if (is_acpi_node(fwnode)) { - dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); } else if (is_software_node(fwnode)) { - dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags); } @@ -4181,6 +4188,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, bool platform_lookup_allowed) { unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; + const char *name = function_name_or_default(con_id); /* * scoped_guard() is implemented as a for loop, meaning static * analyzers will complain about these two not being initialized. @@ -4203,8 +4211,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, } if (IS_ERR(desc)) { - dev_dbg(consumer, "No GPIO consumer %s found\n", - con_id); + dev_dbg(consumer, "No GPIO consumer %s found\n", name); return desc; } @@ -4226,15 +4233,14 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, * * FIXME: Make this more sane and safe. */ - dev_info(consumer, - "nonexclusive access to GPIO for %s\n", con_id); + dev_info(consumer, "nonexclusive access to GPIO for %s\n", name); return desc; } ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (ret < 0) { - dev_dbg(consumer, "setup of GPIO %s failed\n", con_id); gpiod_put(desc); + dev_dbg(consumer, "setup of GPIO %s failed\n", name); return ERR_PTR(ret); } @@ -4350,6 +4356,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional); int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, unsigned long lflags, enum gpiod_flags dflags) { + const char *name = function_name_or_default(con_id); int ret; if (lflags & GPIO_ACTIVE_LOW) @@ -4393,7 +4400,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, /* No particular flag request, return here... */ if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) { - gpiod_dbg(desc, "no flags found for %s\n", con_id); + gpiod_dbg(desc, "no flags found for GPIO %s\n", name); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9c62552bec34..b3b84647207e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0a4b09709cfb..ec888fc6ead8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -819,7 +819,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..7753a2e64d41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ip_blocks[i].status.hw = true; } } + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } @@ -4539,6 +4543,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a07e4b87d4ca..ac5bf01fe8d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: @@ -2237,6 +2238,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 80b9642f2bc4..e4277298cf1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; @@ -667,6 +668,15 @@ MODULE_PARM_DESC(mes, "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); module_param_named(mes, amdgpu_mes, int, 0444); +/** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + /** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4b3000c21ef2..e4742b65032d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index a98e03e0a51f..a00cf4756ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) { int r; - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + if (!amdgpu_mes_log_enable) + return 0; + + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -1549,12 +1552,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, PAGE_SIZE, false); + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); return 0; } - DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); #endif @@ -1565,7 +1567,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - if (adev->enable_mes) + if (adev->enable_mes && amdgpu_mes_log_enable) debugfs_create_file("amdgpu_mes_event_log", 0444, root, adev, &amdgpu_debugfs_mes_event_log_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7d4f93fea937..4c8fc3117ef8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 010b0cb7693c..2099159a693f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -617,8 +617,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.resource->mem_type == TTM_PL_VRAM && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -1272,23 +1271,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_resource *res = bo->tbo.resource; uint64_t size = amdgpu_bo_size(bo); struct drm_gem_object *obj; unsigned int domain; bool shared; /* Abort if the BO doesn't currently have a backing store */ - if (!bo->tbo.resource) + if (!res) return; obj = &bo->tbo.base; shared = drm_gem_object_is_shared_for_memory_stats(obj); - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + domain = amdgpu_mem_type_to_domain(res->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: stats->vram += size; - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) stats->visible_vram += size; if (shared) stats->vram_shared += size; @@ -1389,10 +1390,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->resource->mem_type != TTM_PL_VRAM) - return 0; - - if (amdgpu_bo_in_cpu_visible_vram(abo)) + if (amdgpu_res_cpu_visible(adev, bo->resource)) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1415,7 +1413,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* this should never happen */ if (bo->resource->mem_type == TTM_PL_VRAM && - !amdgpu_bo_in_cpu_visible_vram(abo)) + !amdgpu_res_cpu_visible(adev, bo->resource)) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1579,6 +1577,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, */ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; const char *placement; @@ -1587,10 +1586,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) if (dma_resv_trylock(bo->tbo.base.resv)) { unsigned int domain; + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) placement = "VRAM VISIBLE"; else placement = "VRAM"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be679c42b0b8..fa03d9e4874c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } -/** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - /** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fc418e670fda..1d71729e3f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - amdgpu_bo_in_cpu_visible_vram(abo)) { + amdgpu_res_cpu_visible(adev, bo->resource)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -403,40 +403,55 @@ error: return r; } +/** + * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU + * @adev: amdgpu device + * @res: the resource to check + * + * Returns: true if the full resource is CPU visible, false otherwise. + */ +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res) +{ + struct amdgpu_res_cursor cursor; + + if (!res) + return false; + + if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || + res->mem_type == AMDGPU_PL_PREEMPT) + return true; + + if (res->mem_type != TTM_PL_VRAM) + return false; + + amdgpu_res_first(res, 0, res->size, &cursor); + while (cursor.remaining) { + if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size) + return false; + amdgpu_res_next(&cursor, cursor.size); + } + + return true; +} + /* - * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy + * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy * * Called by amdgpu_bo_move() */ -static bool amdgpu_mem_visible(struct amdgpu_device *adev, - struct ttm_resource *mem) +static bool amdgpu_res_copyable(struct amdgpu_device *adev, + struct ttm_resource *mem) { - u64 mem_size = (u64)mem->size; - struct amdgpu_res_cursor cursor; - u64 end; - - if (mem->mem_type == TTM_PL_SYSTEM || - mem->mem_type == TTM_PL_TT) - return true; - if (mem->mem_type != TTM_PL_VRAM) + if (!amdgpu_res_cpu_visible(adev, mem)) return false; - amdgpu_res_first(mem, 0, mem_size, &cursor); - end = cursor.start + cursor.size; - while (cursor.remaining) { - amdgpu_res_next(&cursor, cursor.size); + /* ttm_resource_ioremap only supports contiguous memory */ + if (mem->mem_type == TTM_PL_VRAM && + !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) + return false; - if (!cursor.remaining) - break; - - /* ttm_resource_ioremap only supports contiguous memory */ - if (end != cursor.start) - return false; - - end = cursor.start + cursor.size; - } - - return end <= adev->gmc.visible_vram_size; + return true; } /* @@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { /* Check that all memory is CPU accessible */ - if (!amdgpu_mem_visible(adev, old_mem) || - !amdgpu_mem_visible(adev, new_mem)) { + if (!amdgpu_res_copyable(adev, old_mem) || + !amdgpu_res_copyable(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } @@ -557,7 +572,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -568,9 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; - /* check if it's visible */ - if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) - return -EINVAL; if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..32cf6b6f6efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ab820cf52668..0df97c3e3a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4299ce386322..94089069c9ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1613,6 +1613,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1639,21 +1670,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1706,17 +1730,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1730,7 +1746,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1817,10 +1833,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..6695481f870f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index d6f808acfb17..fbb43ae7624f 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } +static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) +{ + return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); +} + static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_idx, struct amdgpu_ring *ring) { @@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + if (aqua_vanjaram_xcp_vcn_shared(adev)) inst_mask = 1 << (inst_idx * 2); break; default: @@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update( aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - /* VCN is shared by two partitions under CPX MODE */ + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + aqua_vanjaram_xcp_vcn_shared(adev)) aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 1770e496c1b7..f7325b02a191 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* Make sure that we can't skip the SET_Q_MODE packets when the VM * changed in any way. */ + ring->set_q_mode_offs = 0; ring->set_q_mode_ptr = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 072c478665ad..63f281a9984d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 34237a1b1f2e..82eab49be82b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, - DRAM_ECC_INT_ENABLE, 0); - WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); - break; - /* sdma ecc interrupt is enabled by default - * driver doesn't need to do anything to - * enable the interrupt */ - case AMDGPU_IRQ_STATE_ENABLE: - default: - break; - } + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 581a3bd11481..43ca63fe85ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } @@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; - adev->external_rev_id = adev->rev_id + 0x1; + if (adev->rev_id == 0) + adev->external_rev_id = 0x1; + else + adev->external_rev_id = adev->rev_id + 0x10; break; case IP_VERSION(11, 5, 1): adev->cg_flags = @@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..bd57896ab85d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + ring->wptr = 0; + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); @@ -248,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dfa8c69532d4..55aa74cbc532 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; @@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, /* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) - if (dev) + if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL; @@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (xcp_id >= 0) args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; else - args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; + args->gpu_id = dev->id; args->flags = flags; /* Copy metadata buffer to user mode */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 041ec3de55e7..719d6d365e15 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { struct kfd_node *node; int i; - int count; if (!kfd->init_complete) return; @@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) /* for runtime suspend, skip locking kfd */ if (!run_pm) { mutex_lock(&kfd_processes_mutex); - count = ++kfd_locked; - mutex_unlock(&kfd_processes_mutex); - /* For first KFD device suspend all the KFD processes */ - if (count == 1) + if (++kfd_locked == 1) kfd_suspend_all_processes(); + mutex_unlock(&kfd_processes_mutex); } for (i = 0; i < kfd->num_nodes; i++) { @@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { - int ret, count, i; + int ret, i; if (!kfd->init_complete) return 0; @@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) /* for runtime resume, skip unlocking kfd */ if (!run_pm) { mutex_lock(&kfd_processes_mutex); - count = --kfd_locked; - mutex_unlock(&kfd_processes_mutex); - - WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); - if (count == 0) + if (--kfd_locked == 0) ret = kfd_resume_all_processes(); + WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error"); + mutex_unlock(&kfd_processes_mutex); } return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f4d395e38683..0b655555e167 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); while (halt_if_hws_hang) schedule(); + kfd_hws_hang(dqm); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..40a21be6c07c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..fe2ad0c0de95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..c4c6a29052ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, sq_int_data, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 42d40560cd30..a81ef232fdef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd, static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 717a60d7a4ea..b79986412cd8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) mutex_lock(&kfd_processes_mutex); if (kfd_is_locked()) { - mutex_unlock(&kfd_processes_mutex); pr_debug("KFD is locked! Cannot create process"); - return ERR_PTR(-EINVAL); + process = ERR_PTR(-EINVAL); + goto out; } /* A prior open of /dev/kfd could have already created the process. */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2851719d7121..6d2f60c61dec 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); #define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin" MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB); +#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin" +MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB); + /* Number of bytes in PSP header for firmware. */ #define PSP_HEADER_BYTES 0x100 @@ -3044,6 +3047,10 @@ static int dm_resume(void *handle) /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_root) @@ -4820,9 +4827,11 @@ static int dm_init_microcode(struct amdgpu_device *adev) fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB; break; case IP_VERSION(3, 5, 0): - case IP_VERSION(3, 5, 1): fw_name_dmub = FIRMWARE_DCN_35_DMUB; break; + case IP_VERSION(3, 5, 1): + fw_name_dmub = FIRMWARE_DCN_351_DMUB; + break; default: /* ASIC doesn't support DMUB. */ return 0; @@ -5921,6 +5930,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, &aconnector->base.probed_modes : &aconnector->base.modes; + if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return NULL; + if (aconnector->freesync_vid_base.clock != 0) return &aconnector->freesync_vid_base; @@ -6305,27 +6317,22 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket); - else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || - stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - stream->signal == SIGNAL_TYPE_EDP) { + + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + stream->signal == SIGNAL_TYPE_EDP) { // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet // - stream->use_vsc_sdp_for_colorimetry = false; - if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - stream->use_vsc_sdp_for_colorimetry = - aconnector->dc_sink->is_vsc_sdp_colorimetry_supported; - } else { - if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) - stream->use_vsc_sdp_for_colorimetry = true; - } + stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED; + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; - if (stream->link->psr_settings.psr_feature_enabled) - aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } finish: dc_sink_release(sink); @@ -8764,10 +8771,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; +notify: if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; -notify: aconnector = to_amdgpu_dm_connector(connector); mutex_lock(&adev->dm.audio_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 1f08c6564c3b..286ecd28cc6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -141,9 +141,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) * amdgpu_dm_psr_enable() - enable psr f/w * @stream: stream state * - * Return: true if success */ -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) +void amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; unsigned int vsync_rate_hz = 0; @@ -190,7 +189,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) power_opt |= psr_power_opt_z10_static_screen; - return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + + if (link->ctx->dc->caps.ips_support) + dc_allow_idle_optimizations(link->ctx->dc, true); } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index 6806b3c9c84b..1fdfd183c0d9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -32,7 +32,7 @@ #define AMDGPU_DM_PSR_ENTRY_DELAY 5 void amdgpu_dm_set_psr_caps(struct dc_link *link); -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream); +void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 16e72d623630..08c494a7a21b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder, static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector) { - struct drm_device *dev = connector->dev; - - return drm_add_modes_noedid(connector, dev->mode_config.max_width, - dev->mode_config.max_height); + /* Maximum resolution supported by DWB */ + return drm_add_modes_noedid(connector, 3840, 2160); } static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 12f3e8aa46d8..6ad4f4efec5d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa( return display_count; } -static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || - dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc)) { if (disable) { - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + reset_sync_context_for_pipe(dc, context, i); } else pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); @@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn316_disable_otg_wa(clk_mgr_base, context, true); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn316_disable_otg_wa(clk_mgr_base, context, false); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index c378b879c76d..d9c5692c86c2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -73,6 +73,14 @@ #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define regCLK5_0_CLK5_spll_field_8 0x464b +#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0 + +#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd +#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L + +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 + #define REG(reg_name) \ (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -409,11 +417,25 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs { } +static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc_context *ctx = clk_mgr->base.ctx; + uint32_t ssc_enable; + + REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable); + + return ssc_enable == 1; +} + static void init_clk_states(struct clk_mgr *clk_mgr) { + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) + clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; @@ -423,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); init_clk_states(clk_mgr); + + // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk + if (dcn35_is_spll_ssc_enabled(clk_mgr)) + clk_mgr->dp_dto_source_clock_in_khz = + dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); + else + clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + } static struct clk_bw_params dcn35_bw_params = { .vram_type = Ddr4MemType, @@ -512,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks; static struct dcn35_watermarks dummy_wms = { 0 }; +static struct dcn35_ss_info_table ss_info_table = { + .ss_divider = 1000, + .ss_percentage = {0, 0, 375, 375, 375} +}; + +static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) +{ + struct dc_context *ctx = clk_mgr->base.ctx; + uint32_t clock_source; + + REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); + // If it's DFS mode, clock_source is 0. + if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { + clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; + + if (clk_mgr->dprefclk_ss_percentage != 0) { + clk_mgr->ss_on_dprefclk = true; + clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; + } + } +} + static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table) { int i, num_valid_sets; @@ -709,7 +762,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk clock_table->NumFclkLevelsEnabled; max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : clock_table->NumDcfClkLevelsEnabled; for (i = 0; i < num_dcfclk; i++) { int j; @@ -1056,6 +1109,8 @@ void dcn35_clk_mgr_construct( dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ + dcn35_read_ss_info_from_lut(&clk_mgr->base); + clk_mgr->base.base.bw_params = &dcn35_bw_params; if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e7dc128f6284..03b554e912a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3024,7 +3024,8 @@ static void backup_planes_and_stream_state( scratch->blend_tf[i] = *status->plane_states[i]->blend_tf; } scratch->stream_state = *stream; - scratch->out_transfer_func = *stream->out_transfer_func; + if (stream->out_transfer_func) + scratch->out_transfer_func = *stream->out_transfer_func; } static void restore_planes_and_stream_state( @@ -3046,7 +3047,8 @@ static void restore_planes_and_stream_state( *status->plane_states[i]->blend_tf = scratch->blend_tf[i]; } *stream = scratch->stream_state; - *stream->out_transfer_func = scratch->out_transfer_func; + if (stream->out_transfer_func) + *stream->out_transfer_func = scratch->out_transfer_func; } static bool update_planes_and_stream_state(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 5cc7f8da209c..61986e5cb491 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -436,6 +436,15 @@ bool dc_state_add_plane( goto out; } + if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm) + /* ODM combine could prevent us from supporting more planes + * we will reset ODM slice count back to 1 when all planes have + * been removed to maximize the amount of planes supported when + * new planes are added. + */ + resource_update_pipes_for_stream_with_slice_count( + state, dc->current_state, dc->res_pool, stream, 1); + otg_master_pipe = resource_get_otg_master_for_stream( &state->res_ctx, stream); if (otg_master_pipe) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 970644b695cd..b5e0289d2fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk( struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; - if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) + // Apply ssed(spread spectrum) dpref clock for edp only. + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 + && pix_clk_params->signal_type == SIGNAL_TYPE_EDP + && encoding == DP_8b_10b_ENCODING) dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; // For these signal types Driver to program DP_DTO without calling VBIOS Command table if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { @@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz( unsigned int modulo_hz = 0; unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz; - if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) - dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; - if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) { clock_hz = REG_READ(PHASE[inst]); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile index f0777d61c2cb..c307f040e48f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init DCE110 = dce110_timing_generator.o \ dce110_compressor.o dce110_opp_regamma_v.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile index 7e92effec894..683866797709 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init DCE112 = dce112_compressor.o diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile index 1e3ef68a452a..8f508e662748 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile @@ -24,7 +24,7 @@ # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init DCE120 = dce120_timing_generator.o diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile index fee331accc0e..eede83ad91fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \ dce60_resource.o diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index 7eefffbdc925..fba189d26652 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init DCE80 = dce80_timing_generator.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index bf3386cd444d..5ebb57303130 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -44,6 +44,36 @@ #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) +void mpc3_mpc_init(struct mpc *mpc) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + int opp_id; + + mpc1_mpc_init(mpc); + + for (opp_id = 0; opp_id < MAX_OPP; opp_id++) { + if (REG(MUX[opp_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); + } +} + +void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + mpc1_mpc_init_single_inst(mpc, mpcc_id); + + /* assuming mpc out mux is connected to opp with the same index at this + * point in time (e.g. transitioning from vbios to driver) + */ + if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); +} + bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id) @@ -80,25 +110,6 @@ void mpc3_disable_dwb_mux( MPC_DWB0_MUX, 0xf); } -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control) -{ - struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); - - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_RATE_CONTROL_DISABLE, !enable, - MPC_OUT_RATE_CONTROL, rate_2x_mode); - - if (flow_control) - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode, - MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1); -} - enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id) { /*Contrary to DCN2 and DCN1 wherein a single status register field holds this info; @@ -1490,8 +1501,8 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .read_mpcc_state = mpc3_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, - .mpc_init = mpc1_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init = mpc3_mpc_init, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1508,7 +1519,6 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc3_program_shaper, .acquire_rmu = mpcc3_acquire_rmu, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h index 9cb96ae95a2f..ce93003dae01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h @@ -1007,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30, int num_mpcc, int num_rmu); +void mpc3_mpc_init( + struct mpc *mpc); + +void mpc3_mpc_init_single_inst( + struct mpc *mpc, + unsigned int mpcc_id); + bool mpc3_program_shaper( struct mpc *mpc, const struct pwl_params *params, @@ -1078,13 +1085,6 @@ bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id); -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control); - void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c index e224a028d68a..8a0460e86309 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c @@ -248,14 +248,12 @@ void dcn32_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; + enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; enc10->base.features = *enc_features; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; - - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.transmitter = init_data->transmitter; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index e789e654c387..e408e859b355 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc) struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); int mpcc_id; - mpc1_mpc_init(mpc); + mpc3_mpc_init(mpc); if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) { if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) { @@ -991,7 +991,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, .mpc_init = mpc32_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1008,7 +1008,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc32_program_shaper, .program_3dlut = mpc32_program_3dlut, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index 81e349d5835b..da94e5309fba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -184,6 +184,8 @@ void dcn35_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; @@ -238,8 +240,6 @@ void dcn35_link_encoder_construct( } enc10->base.features.flags.bits.HDMI_6GB_EN = 1; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; if (bp_funcs->get_connector_speed_cap_info) result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 80bebfc268db..21e0eef3269b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .num_states = 5, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index dc9e1b758ed6..b3ffab77cf88 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -98,55 +98,114 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .clock_limits = { { .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 400.0, + .fabricclk_mhz = 400.0, + .socclk_mhz = 600.0, + .dram_speed_mts = 3200.0, + .dispclk_mhz = 600.0, + .dppclk_mhz = 600.0, .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, + .dscclk_mhz = 200.0, .dtbclk_mhz = 600.0, }, { .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 600.0, + .fabricclk_mhz = 1000.0, + .socclk_mhz = 733.0, + .dram_speed_mts = 6400.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 738.0, + .fabricclk_mhz = 1200.0, + .socclk_mhz = 880.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 800.0, + .fabricclk_mhz = 1400.0, + .socclk_mhz = 978.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 960.0, + .dppclk_mhz = 960.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, + .dscclk_mhz = 320.0, .dtbclk_mhz = 600.0, }, { .state = 4, + .dcfclk_mhz = 873.0, + .fabricclk_mhz = 1600.0, + .socclk_mhz = 1100.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1066.7, + .dppclk_mhz = 1066.7, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 355.6, + .dtbclk_mhz = 600.0, + }, + { + .state = 5, + .dcfclk_mhz = 960.0, + .fabricclk_mhz = 1700.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, .dispclk_mhz = 1200.0, .dppclk_mhz = 1200.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, + .dscclk_mhz = 400.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 6, + .dcfclk_mhz = 1067.0, + .fabricclk_mhz = 1850.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1371.4, + .dppclk_mhz = 1371.4, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 457.1, + .dtbclk_mhz = 600.0, + }, + { + .state = 7, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 2000.0, + .socclk_mhz = 1467.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1600.0, + .dppclk_mhz = 1600.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 533.3, .dtbclk_mhz = 600.0, }, }, - .num_states = 5, + .num_states = 8, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, @@ -177,6 +236,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .num_chans = 4, + .dram_clock_change_latency_us = 11.72, + .dispclk_dppclk_vco_speed_mhz = 2400.0, }; /* @@ -340,6 +402,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -352,6 +416,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } @@ -551,6 +617,7 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; } + /*dcn351 does not support z9/z10*/ if (context->stream_count == 0 || plane_count == 0) { support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; @@ -564,11 +631,9 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; - /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/ - if (is_pwrseq0 && (is_psr || is_replay)) + if (is_pwrseq0 && (is_psr || is_replay)) support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW; - } context->bw_ctx.bw.dcn.clk.zstate_support = support; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 17a58f41fc6a..a20f28a5d2e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -228,17 +228,13 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s break; case dml_project_dcn35: + case dml_project_dcn351: out->num_chans = 4; out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; out->dispclk_dppclk_vco_speed_mhz = 3600; break; - case dml_project_dcn351: - out->num_chans = 16; - out->round_trip_ping_latency_dcfclk_cycles = 1100; - out->smn_latency_us = 2; - break; } /* ---Overrides if available--- */ if (dml2->config.bbox_overrides.dram_num_chan) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 9d5df4c0da59..0ba1feaf96c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) if (dccg) { dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 3a9cc8ac0c07..093f4387553c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -69,29 +69,6 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -183,10 +160,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -199,20 +172,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c0b526cf1786..7668229438da 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -966,29 +966,6 @@ void dcn32_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -1103,10 +1080,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -1119,20 +1092,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 4b92df23ff0d..a5560b3fc39b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -358,29 +358,6 @@ void dcn35_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -474,10 +451,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -490,20 +463,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index ab17fa1c64e8..670255c9bc82 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .prepare_bandwidth = dcn35_prepare_bandwidth, .optimize_bandwidth = dcn35_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn35_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn35_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index f07a4c7e48bc..52eab8fccb7f 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc) OTG_V_TOTAL_MAX_SEL, 1, OTG_FORCE_LOCK_ON_EVENT, 0, OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 5b486400dfdb..909e14261f9b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -700,6 +700,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, + .disable_optc_power_gate = true, /*should the same as above two*/ + .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/ .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, @@ -742,12 +744,13 @@ static const struct dc_debug_options debug_defaults_drv = { }, .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ + .minimum_z8_residency_time = 2100, .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, - .disable_idle_power_optimizations = true, + .disable_idle_power_optimizations = false, .dmcub_emulation = false, .disable_boot_optimizations = false, .disable_unbounded_requesting = false, @@ -758,8 +761,10 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 + .ips2_eval_delay_us = 2000, + .ips2_entry_delay_us = 800, + .disable_dmub_reallow_idle = true, + .static_screen_wait_frames = 2, }; static const struct dc_panel_config panel_config_defaults = { diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 738ee763f24a..84f9b412a4f1 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -147,15 +147,12 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */ - if (stream->link->psr_settings.psr_feature_enabled) { - if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) - vsc_packet_revision = vsc_packet_rev2; - } - - if (stream->link->replay_settings.config.replay_supported) + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->replay_settings.config.replay_supported) + vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) + vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ if (stream->use_vsc_sdp_for_colorimetry) diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h index beadb9e42850..ca83e9e5c3ff 100644 --- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h +++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h @@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES { uint32_t enable_level_process_quantum_check : 1; uint32_t is_vcn0_enabled : 1; uint32_t is_vcn1_enabled : 1; - uint32_t reserved : 27; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 26; }; uint32_t uint32_all; }; @@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE { struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; struct UMSCH_API_STATUS api_status; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE { uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND { uint32_t suspend_fence_value; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -356,6 +362,7 @@ union UMSCHAPI__RESUME { enum UMSCH_ENGINE_TYPE engine_type; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY { union UMSCH_AFFINITY affinity; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL { uint64_t context_quantum; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 246b211b1e85..65333141b1c1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -735,7 +735,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_EXIT; + smu->smu_baco.state = SMU_BACO_STATE_NONE; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } +static int smu_reset_mp1_state(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if ((!adev->in_runpm) && (!adev->in_suspend) && + (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) + ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); + + return ret; +} + static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - return smu_smc_hw_cleanup(smu); + ret = smu_smc_hw_cleanup(smu); + if (ret) + return ret; + + ret = smu_reset_mp1_state(smu); + if (ret) + return ret; + + return 0; } static void smu_late_fini(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index a870bdd49a4e..1fa81575788c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -424,6 +424,7 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, + SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 5bb7a63c0602..97522c085258 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -144,6 +144,37 @@ typedef struct { uint32_t MaxGfxClk; } DpmClocks_t; +//Freq in MHz +//Voltage in milli volts with 2 fractional bits +typedef struct { + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; + uint32_t VClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t VClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t VPEClocks[NUM_VPE_DPM_LEVELS]; + uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS]; + uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS]; + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; + MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS]; + + uint8_t NumDcfClkLevelsEnabled; + uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk + uint8_t NumSocClkLevelsEnabled; + uint8_t Vcn0ClkLevelsEnabled; //Applies to both Vclk0 and Dclk0 + uint8_t Vcn1ClkLevelsEnabled; //Applies to both Vclk1 and Dclk1 + uint8_t VpeClkLevelsEnabled; + uint8_t NumMemPstatesEnabled; + uint8_t NumFclkLevelsEnabled; + uint8_t spare; + + uint32_t MinGfxClk; + uint32_t MaxGfxClk; +} DpmClocks_t_v14_0_1; + typedef struct { uint16_t CoreFrequency[16]; //Target core frequency [MHz] uint16_t CorePower[16]; //CAC calculated core power [mW] @@ -224,7 +255,7 @@ typedef enum { #define TABLE_CUSTOM_DPM 2 // Called by Driver #define TABLE_BIOS_GPIO_CONFIG 3 // Called by BIOS #define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS -#define TABLE_SPARE0 5 // Unused +#define TABLE_MOMENTARY_PM 5 // Called by Tools #define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log #define TABLE_SMU_METRICS 7 // Called by Driver and SMF/PMF #define TABLE_COUNT 8 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h index 356e0f57a426..ddb625860083 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h @@ -42,7 +42,7 @@ #define FEATURE_EDC_BIT 7 #define FEATURE_PLL_POWER_DOWN_BIT 8 #define FEATURE_VDDOFF_BIT 9 -#define FEATURE_VCN_DPM_BIT 10 +#define FEATURE_VCN_DPM_BIT 10 /* this is for both VCN0 and VCN1 */ #define FEATURE_DS_MPM_BIT 11 #define FEATURE_FCLK_DPM_BIT 12 #define FEATURE_SOCCLK_DPM_BIT 13 @@ -56,9 +56,9 @@ #define FEATURE_DS_GFXCLK_BIT 21 #define FEATURE_DS_SOCCLK_BIT 22 #define FEATURE_DS_LCLK_BIT 23 -#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 // for all DISP clks +#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 #define FEATURE_DS_SHUBCLK_BIT 25 -#define FEATURE_SPARE0_BIT 26 //SPARE +#define FEATURE_RESERVED0_BIT 26 #define FEATURE_ZSTATES_BIT 27 #define FEATURE_IOMMUL2_PG_BIT 28 #define FEATURE_DS_FCLK_BIT 29 @@ -66,8 +66,8 @@ #define FEATURE_DS_MP1CLK_BIT 31 #define FEATURE_WHISPER_MODE_BIT 32 #define FEATURE_SMU_LOW_POWER_BIT 33 -#define FEATURE_SMART_L3_RINSER_BIT 34 -#define FEATURE_SPARE1_BIT 35 //SPARE +#define FEATURE_RESERVED1_BIT 34 /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */ +#define FEATURE_GFX_DEM_BIT 35 /* v14_0_0 SPARE; v14_0_1 GFX_DEM */ #define FEATURE_PSI_BIT 36 #define FEATURE_PROCHOT_BIT 37 #define FEATURE_CPUOFF_BIT 38 @@ -77,11 +77,11 @@ #define FEATURE_PERF_LIMIT_BIT 42 #define FEATURE_CORE_DLDO_BIT 43 #define FEATURE_DVO_BIT 44 -#define FEATURE_DS_VCN_BIT 45 +#define FEATURE_DS_VCN_BIT 45 /* v14_0_1 this is for both VCN0 and VCN1 */ #define FEATURE_CPPC_BIT 46 #define FEATURE_CPPC_PREFERRED_CORES 47 #define FEATURE_DF_CSTATES_BIT 48 -#define FEATURE_SPARE2_BIT 49 //SPARE +#define FEATURE_FAST_PSTATE_CLDO_BIT 49 /* v14_0_0 SPARE */ #define FEATURE_ATHUB_PG_BIT 50 #define FEATURE_VDDOFF_ECO_BIT 51 #define FEATURE_ZSTATES_ECO_BIT 52 @@ -93,8 +93,8 @@ #define FEATURE_DS_IPUCLK_BIT 58 #define FEATURE_DS_VPECLK_BIT 59 #define FEATURE_VPE_DPM_BIT 60 -#define FEATURE_SPARE_61 61 -#define FEATURE_FP_DIDT 62 +#define FEATURE_SMART_L3_RINSER_BIT 61 /* v14_0_0 SPARE*/ +#define FEATURE_PCC_BIT 62 /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */ #define NUM_FEATURES 63 // Firmware Header/Footer @@ -151,6 +151,43 @@ typedef struct { // MP1_EXT_SCRATCH7 = RTOS Current Job } FwStatus_t; +typedef struct { + // MP1_EXT_SCRATCH0 + uint32_t DpmHandlerID : 8; + uint32_t ActivityMonitorID : 8; + uint32_t DpmTimerID : 8; + uint32_t DpmHubID : 4; + uint32_t DpmHubTask : 4; + // MP1_EXT_SCRATCH1 + uint32_t CclkSyncStatus : 8; + uint32_t ZstateStatus : 4; + uint32_t Cpu1VddOff : 4; + uint32_t DstateFun : 4; + uint32_t DstateDev : 4; + uint32_t GfxOffStatus : 2; + uint32_t Cpu0Off : 2; + uint32_t Cpu1Off : 2; + uint32_t Cpu0VddOff : 2; + // MP1_EXT_SCRATCH2 + uint32_t P2JobHandler :32; + // MP1_EXT_SCRATCH3 + uint32_t PostCode :32; + // MP1_EXT_SCRATCH4 + uint32_t MsgPortBusy :15; + uint32_t RsmuPmiP1Pending : 1; + uint32_t RsmuPmiP2PendingCnt : 8; + uint32_t DfCstateExitPending : 1; + uint32_t Pc6EntryPending : 1; + uint32_t Pc6ExitPending : 1; + uint32_t WarmResetPending : 1; + uint32_t Mp0ClkPending : 1; + uint32_t InWhisperMode : 1; + uint32_t spare2 : 2; + // MP1_EXT_SCRATCH5 + uint32_t IdleMask :32; + // MP1_EXT_SCRATCH6 = RTOS threads' status + // MP1_EXT_SCRATCH7 = RTOS Current Job +} FwStatus_t_v14_0_1; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h index 8a8a57c56bc0..c4dc5881d8df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h @@ -54,14 +54,14 @@ #define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team #define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version #define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version -#define PPSMC_MSG_SPARE0 0x04 ///< SPARE -#define PPSMC_MSG_SPARE1 0x05 ///< SPARE -#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN -#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default -#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display +#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1 +#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default +#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0 +#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default +#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display #define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz -#define PPSMC_MSG_SPARE2 0x0A ///< SPARE -#define PPSMC_MSG_SPARE3 0x0B ///< SPARE +#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display +#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload #define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer #define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer @@ -71,36 +71,32 @@ #define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW #define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK #define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK -#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK) - +#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0) #define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU - -#define PPSMC_MSG_spare_0x17 0x17 -#define PPSMC_MSG_spare_0x18 0x18 +#define PPSMC_MSG_spare_0x17 0x17 ///< Get GFX clock frequency +#define PPSMC_MSG_spare_0x18 0x18 ///< Get FCLK frequency #define PPSMC_MSG_AllowGfxOff 0x19 ///< Inform PMFW of allowing GFXOFF entry #define PPSMC_MSG_DisallowGfxOff 0x1A ///< Inform PMFW of disallowing GFXOFF entry #define PPSMC_MSG_SetSoftMaxGfxClk 0x1B ///< Set soft max for GFX CLK #define PPSMC_MSG_SetHardMinGfxClk 0x1C ///< Set hard min for GFX CLK - #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK -#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK) -#define PPSMC_MSG_spare_0x20 0x20 -#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg -#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default - +#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0) +#define PPSMC_MSG_spare_0x20 0x20 ///< Set power limit percentage +#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0 +#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default #define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK #define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK #define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity -#define PPSMC_MSG_Reserved 0x26 ///< Not used -#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp -#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp +#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1 +#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default +#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default #define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM #define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK #define PPSMC_MSG_SetHardMinIspxclkByFreq 0x2C ///< Set HardMin by frequency for ISPXCLK -#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler -#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler +#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler +#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler #define PPSMC_Message_IspStutterOn_MmhubPgDis 0x2F ///< ISP StutterOn mmHub PgDis #define PPSMC_Message_IspStutterOff_MmhubPgEn 0x30 ///< ISP StufferOff mmHub PgEn #define PPSMC_MSG_PowerUpVpe 0x31 ///< Power up VPE @@ -110,7 +106,9 @@ #define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA #define PPSMC_MSG_SetSoftMaxVpe 0x36 ///< #define PPSMC_MSG_SetSoftMinVpe 0x37 ///< -#define PPSMC_Message_Count 0x38 ///< Total number of PPSMC messages +#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache +#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache +#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages /** @}*/ /** diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a941fdbf78b6..af427cc7dbb8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -115,6 +115,10 @@ __SMU_DUMMY_MAP(PowerDownVcn), \ __SMU_DUMMY_MAP(PowerUpJpeg), \ __SMU_DUMMY_MAP(PowerDownJpeg), \ + __SMU_DUMMY_MAP(PowerUpJpeg0), \ + __SMU_DUMMY_MAP(PowerDownJpeg0), \ + __SMU_DUMMY_MAP(PowerUpJpeg1), \ + __SMU_DUMMY_MAP(PowerDownJpeg1), \ __SMU_DUMMY_MAP(BacoAudioD3PME), \ __SMU_DUMMY_MAP(ArmD3), \ __SMU_DUMMY_MAP(RunDcBtc), \ @@ -135,6 +139,8 @@ __SMU_DUMMY_MAP(PowerUpSdma), \ __SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \ __SMU_DUMMY_MAP(SetHardMinVcn), \ + __SMU_DUMMY_MAP(SetHardMinVcn0), \ + __SMU_DUMMY_MAP(SetHardMinVcn1), \ __SMU_DUMMY_MAP(SetAllowFclkSwitch), \ __SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \ __SMU_DUMMY_MAP(ActiveProcessNotify), \ @@ -150,6 +156,8 @@ __SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetSoftMinVcn), \ + __SMU_DUMMY_MAP(SetSoftMinVcn0), \ + __SMU_DUMMY_MAP(SetSoftMinVcn1), \ __SMU_DUMMY_MAP(EnablePostCode), \ __SMU_DUMMY_MAP(GetGfxclkFrequency), \ __SMU_DUMMY_MAP(GetFclkFrequency), \ @@ -161,6 +169,8 @@ __SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxVcn), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn0), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn1), \ __SMU_DUMMY_MAP(PowerGateMmHub), \ __SMU_DUMMY_MAP(UpdatePmeRestore), \ __SMU_DUMMY_MAP(GpuChangeState), \ diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 3f7463c1c1a9..4af1985ae446 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -27,6 +27,7 @@ #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7 +#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1 #define FEATURE_MASK(feature) (1ULL << feature) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 9c03296f92cd..67117ced7c6a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_set_mp1_state(smu, mp1_state); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_PrepareMp1ForUnload, + 0x55, NULL); + + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) + ret = smu_v13_0_disable_pmfw_state(smu); + break; default: /* Ignore others */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index bb98156b2fa1..949131bd1ecb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && !adev->in_s0ix) + if (!en && !adev->in_s0ix) { + /* Adds a GFX reset as workaround just before sending the + * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering + * an invalid state. + */ + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_2, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index b06a3cc43305..07a65e005785 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0; break; case IP_VERSION(14, 0, 1): - smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0; + smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1; break; default: @@ -1402,9 +1402,22 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, if (adev->vcn.harvest_config & (1 << i)) continue; - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, + i << 16U, NULL); + else if (i == 1) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + i << 16U, NULL); + } + if (ret) return ret; } @@ -1415,9 +1428,34 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, int smu_v14_0_set_jpeg_enable(struct smu_context *smu, bool enable) { - return smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, - 0, NULL); + struct amdgpu_device *adev = smu->adev; + int i, ret = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0, + i << 16U, NULL); + else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, + i << 16U, NULL); + } + + if (ret) + return ret; + } + + return ret; } int smu_v14_0_run_btc(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 9310c4758e38..63399c00cc28 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -70,9 +70,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), - MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1), - MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1), + MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1), + MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1), + MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1), + MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1), + MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1), + MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1), MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -83,7 +86,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1), MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1), MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1), - MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1), + MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1), + MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1), MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), @@ -91,9 +95,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1), MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1), MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1), - MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1), - MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), - MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1), + MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1), + MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1), + MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1), + MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1), + MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1), + MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1), MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1), MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1), MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1), @@ -154,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t), + SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); @@ -164,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu) goto err0_out; smu_table->metrics_time = 0; - smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL); + smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL); if (!smu_table->clocks_table) goto err1_out; @@ -586,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu) return ret; } +static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t dpm_level, + uint32_t *freq) +{ + DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table; + + if (!clk_table || clk_type >= SMU_CLK_COUNT) + return -EINVAL; + + switch (clk_type) { + case SMU_SOCCLK: + if (dpm_level >= clk_table->NumSocClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->SocClocks[dpm_level]; + break; + case SMU_VCLK: + if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->VClocks0[dpm_level]; + break; + case SMU_DCLK: + if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->DClocks0[dpm_level]; + break; + case SMU_VCLK1: + if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->VClocks1[dpm_level]; + break; + case SMU_DCLK1: + if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->DClocks1[dpm_level]; + break; + case SMU_UCLK: + case SMU_MCLK: + if (dpm_level >= clk_table->NumMemPstatesEnabled) + return -EINVAL; + *freq = clk_table->MemPstateTable[dpm_level].MemClk; + break; + case SMU_FCLK: + if (dpm_level >= clk_table->NumFclkLevelsEnabled) + return -EINVAL; + *freq = clk_table->FclkClocks_Freq[dpm_level]; + break; + default: + return -EINVAL; + } + + return 0; +} + static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t dpm_level, @@ -630,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu, return 0; } +static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t dpm_level, + uint32_t *freq) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0)) + smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq); + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq); + + return 0; +} + static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type) { @@ -650,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu, break; case SMU_VCLK: case SMU_DCLK: + case SMU_VCLK1: + case SMU_DCLK1: feature_id = SMU_FEATURE_VCN_DPM_BIT; break; default: @@ -659,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu, return smu_cmn_feature_is_enabled(smu, feature_id); } +static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table; + uint32_t clock_limit; + uint32_t max_dpm_level, min_dpm_level; + int ret = 0; + + if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + clock_limit = smu->smu_table.boot_values.uclk; + break; + case SMU_FCLK: + clock_limit = smu->smu_table.boot_values.fclk; + break; + case SMU_GFXCLK: + case SMU_SCLK: + clock_limit = smu->smu_table.boot_values.gfxclk; + break; + case SMU_SOCCLK: + clock_limit = smu->smu_table.boot_values.socclk; + break; + case SMU_VCLK: + case SMU_VCLK1: + clock_limit = smu->smu_table.boot_values.vclk; + break; + case SMU_DCLK: + case SMU_DCLK1: + clock_limit = smu->smu_table.boot_values.dclk; + break; + default: + clock_limit = 0; + break; + } + + /* clock in Mhz unit */ + if (min) + *min = clock_limit / 100; + if (max) + *max = clock_limit / 100; + + return 0; + } + + if (max) { + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + *max = clk_table->MaxGfxClk; + break; + case SMU_MCLK: + case SMU_UCLK: + case SMU_FCLK: + max_dpm_level = 0; + break; + case SMU_SOCCLK: + max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1; + break; + case SMU_VCLK: + case SMU_DCLK: + max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1; + break; + case SMU_VCLK1: + case SMU_DCLK1: + max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1; + break; + default: + ret = -EINVAL; + goto failed; + } + + if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) { + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max); + if (ret) + goto failed; + } + } + + if (min) { + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + *min = clk_table->MinGfxClk; + break; + case SMU_MCLK: + case SMU_UCLK: + min_dpm_level = clk_table->NumMemPstatesEnabled - 1; + break; + case SMU_FCLK: + min_dpm_level = clk_table->NumFclkLevelsEnabled - 1; + break; + case SMU_SOCCLK: + min_dpm_level = 0; + break; + case SMU_VCLK: + case SMU_DCLK: + case SMU_VCLK1: + case SMU_DCLK1: + min_dpm_level = 0; + break; + default: + ret = -EINVAL; + goto failed; + } + + if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) { + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min); + if (ret) + goto failed; + } + } + +failed: + return ret; +} + static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, @@ -729,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu, } if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) { - ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max); + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max); if (ret) goto failed; } @@ -761,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu, } if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) { - ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min); + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min); if (ret) goto failed; } @@ -771,6 +967,19 @@ failed: return ret; } +static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0)) + smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max); + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max); + + return 0; +} + static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value) @@ -804,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu, return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value); } +static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *count) +{ + DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table; + + switch (clk_type) { + case SMU_SOCCLK: + *count = clk_table->NumSocClkLevelsEnabled; + break; + case SMU_VCLK: + case SMU_DCLK: + *count = clk_table->Vcn0ClkLevelsEnabled; + break; + case SMU_VCLK1: + case SMU_DCLK1: + *count = clk_table->Vcn1ClkLevelsEnabled; + break; + case SMU_MCLK: + *count = clk_table->NumMemPstatesEnabled; + break; + case SMU_FCLK: + *count = clk_table->NumFclkLevelsEnabled; + break; + default: + break; + } + + return 0; +} + static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *count) @@ -833,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu, return 0; } +static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *count) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0)) + smu_v14_0_0_get_dpm_level_count(smu, clk_type, count); + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + smu_v14_0_1_get_dpm_level_count(smu, clk_type, count); + + return 0; +} + static int smu_v14_0_0_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { @@ -859,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu, case SMU_SOCCLK: case SMU_VCLK: case SMU_DCLK: + case SMU_VCLK1: + case SMU_DCLK1: case SMU_MCLK: case SMU_FCLK: ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value); if (ret) break; - ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count); + ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count); if (ret) break; for (i = 0; i < count; i++) { - ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value); + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value); if (ret) break; @@ -933,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu, break; case SMU_VCLK: case SMU_DCLK: - msg_set_min = SMU_MSG_SetHardMinVcn; - msg_set_max = SMU_MSG_SetSoftMaxVcn; + msg_set_min = SMU_MSG_SetHardMinVcn0; + msg_set_max = SMU_MSG_SetSoftMaxVcn0; + break; + case SMU_VCLK1: + case SMU_DCLK1: + msg_set_min = SMU_MSG_SetHardMinVcn1; + msg_set_max = SMU_MSG_SetSoftMaxVcn1; break; default: return -EINVAL; @@ -964,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu, case SMU_FCLK: case SMU_VCLK: case SMU_DCLK: - ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) break; - ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); + ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); if (ret) break; @@ -993,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max); sclk_min = sclk_max; fclk_min = fclk_max; socclk_min = socclk_max; break; case AMD_DPM_FORCED_LEVEL_LOW: - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL); sclk_max = sclk_min; fclk_max = fclk_min; socclk_max = socclk_min; break; case AMD_DPM_FORCED_LEVEL_AUTO: - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max); - smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max); + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: @@ -1060,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu, return ret; } +static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) +{ + DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table; + + smu->gfx_default_hard_min_freq = clk_table->MinGfxClk; + smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk; + smu->gfx_actual_hard_min_freq = 0; + smu->gfx_actual_soft_max_freq = 0; + + return 0; +} + static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) { DpmClocks_t *clk_table = smu->smu_table.clocks_table; @@ -1072,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm return 0; } +static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0)) + smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu); + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu); + + return 0; +} + static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu, bool enable) { @@ -1088,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu, 0, NULL); } +static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table) +{ + DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table; + uint8_t idx; + + /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */ + for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) { + clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0; + clock_table->SocClocks[idx].Vol = 0; + } + + for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) { + clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0; + clock_table->VPEClocks[idx].Vol = 0; + } + + return 0; +} + static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table) { DpmClocks_t *clk_table = smu->smu_table.clocks_table; @@ -1107,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks * return 0; } +static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0)) + smu_14_0_0_get_dpm_table(smu, clock_table); + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + smu_14_0_1_get_dpm_table(smu, clock_table); + + return 0; +} + static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .check_fw_version = smu_v14_0_check_fw_version, @@ -1128,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .set_driver_table_location = smu_v14_0_set_driver_table_location, .gfx_off_control = smu_v14_0_gfx_off_control, .mode2_reset = smu_v14_0_0_mode2_reset, - .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq, .od_edit_dpm_table = smu_v14_0_od_edit_dpm_table, .print_clk_levels = smu_v14_0_0_print_clk_levels, .force_clk_levels = smu_v14_0_0_force_clk_levels, .set_performance_level = smu_v14_0_0_set_performance_level, - .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters, + .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters, .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu, .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, - .get_dpm_clock_table = smu_14_0_0_get_dpm_table, + .get_dpm_clock_table = smu_v14_0_common_get_dpm_table, }; static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu) diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index ebb6d8ebd44e..1e9259416980 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) { struct ast_device *ast = to_ast_device(dev); u8 video_on_off = on; + u32 i = 0; // Video On/Off ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on); @@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) { // wait 1 ms mdelay(1); + if (++i > 200) + break; } } } diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c index bd61e20770a5..14a2a8473682 100644 --- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c @@ -52,7 +52,7 @@ * @adapter: I2C adapter for the DDC bus * @offset: register offset * @buffer: buffer for return data - * @size: sizo of the buffer + * @size: size of the buffer * * Reads @size bytes from the DP dual mode adaptor registers * starting at @offset. @@ -116,7 +116,7 @@ EXPORT_SYMBOL(drm_dp_dual_mode_read); * @adapter: I2C adapter for the DDC bus * @offset: register offset * @buffer: buffer for write data - * @size: sizo of the buffer + * @size: size of the buffer * * Writes @size bytes to the DP dual mode adaptor registers * starting at @offset. diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 266826eac4a7..f5d4be897866 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -4111,6 +4111,13 @@ int drm_dp_bw_overhead(int lane_count, int hactive, u32 overhead = 1000000; int symbol_cycles; + if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) { + DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n", + lane_count, hactive, + bpp_x16 >> 4, (bpp_x16 & 0xf) * 625); + return 0; + } + /* * DP Standard v2.1 2.6.4.1 * SSC downspread and ref clock variation margin: diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 871e4e2129d6..0683a129b362 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; + /* points to modes protected by mode_config.mutex */ struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; @@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, drm_client_pick_crtcs(client, connectors, connector_count, crtcs, modes, 0, width, height); } - mutex_unlock(&dev->mode_config.mutex); drm_client_modeset_release(client); @@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } + mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 7352bde299d5..03bd3c7bd0dc 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf, { struct drm_gem_object *obj = dma_buf->priv; - if (!obj->funcs->get_sg_table) + /* + * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers + * that implement their own ->map_dma_buf() do not. + */ + if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && + !obj->funcs->get_sg_table) return -ENOSYS; return drm_gem_pin(obj); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3ef6ed41e62b..fba73c38e235 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,9 +33,9 @@ endif subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable -CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init) -CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init) -CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init) +CFLAGS_i915_pci.o = -Wno-override-init +CFLAGS_display/intel_display_device.o = -Wno-override-init +CFLAGS_display/intel_fbdev.o = -Wno-override-init # Support compiling the display code separately for both i915 and xe # drivers. Define I915 when building i915. @@ -118,6 +118,7 @@ gt-y += \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ gt/intel_gt_buffer_pool.o \ + gt/intel_gt_ccs_mode.o \ gt/intel_gt_clock_utils.o \ gt/intel_gt_debugfs.o \ gt/intel_gt_engines_debugfs.o \ diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index dfe0b07a122d..06ec04e667e3 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -717,7 +717,6 @@ static void g4x_enable_dp(struct intel_atomic_state *state, { intel_enable_dp(state, encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void vlv_enable_dp(struct intel_atomic_state *state, @@ -726,7 +725,6 @@ static void vlv_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void g4x_pre_enable_dp(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index eda4a8b88590..ac456a2275db 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index fe52c06271ef..52bd3576835b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. */ -static void fixup_mipi_sequences(struct drm_i915_private *i915, - struct intel_panel *panel) +static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) { u8 *init_otp; int len; - /* Limit this to VLV for now. */ - if (!IS_VALLEYVIEW(i915)) - return; - /* Limit this to v1 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || panel->vbt.dsi.seq_version != 1) @@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915, panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1; } +/* + * Some machines (eg. Lenovo 82TQ) appear to have broken + * VBT sequences: + * - INIT_OTP is not present at all + * - what should be in INIT_OTP is in DISPLAY_ON + * - what should be in DISPLAY_ON is in BACKLIGHT_ON + * (along with the actual backlight stuff) + * + * To make those work we simply swap DISPLAY_ON and INIT_OTP. + * + * TODO: Do we need to limit this to specific machines, + * or examine the contents of the sequences to + * avoid false positives? + */ +static void icl_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] && + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) { + drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); + + swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP], + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]); + } +} + +static void fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (DISPLAY_VER(i915) >= 11) + icl_fixup_mipi_sequences(i915, panel); + else if (IS_VALLEYVIEW(i915)) + vlv_fixup_mipi_sequences(i915, panel); +} + static void parse_mipi_sequence(struct drm_i915_private *i915, struct intel_panel *panel) @@ -3351,6 +3382,9 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ed89b86ea625..f672bfd70d45 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) intel_atomic_get_old_cdclk_state(state); const struct intel_cdclk_state *new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - enum pipe pipe = new_cdclk_state->pipe; + struct intel_cdclk_config cdclk_config; + enum pipe pipe; if (!intel_cdclk_changed(&old_cdclk_state->actual, &new_cdclk_state->actual)) @@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) if (IS_DG2(i915)) intel_cdclk_pcode_pre_notify(state); - if (pipe == INVALID_PIPE || - old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) { - drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed); + if (new_cdclk_state->disable_pipes) { + cdclk_config = new_cdclk_state->actual; + pipe = INVALID_PIPE; + } else { + if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) { + cdclk_config = new_cdclk_state->actual; + pipe = new_cdclk_state->pipe; + } else { + cdclk_config = old_cdclk_state->actual; + pipe = INVALID_PIPE; + } - intel_set_cdclk(i915, &new_cdclk_state->actual, pipe); + cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level, + old_cdclk_state->actual.voltage_level); } + + drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed); + + intel_set_cdclk(i915, &cdclk_config, pipe); } /** @@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) intel_atomic_get_old_cdclk_state(state); const struct intel_cdclk_state *new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - enum pipe pipe = new_cdclk_state->pipe; + enum pipe pipe; if (!intel_cdclk_changed(&old_cdclk_state->actual, &new_cdclk_state->actual)) @@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) if (IS_DG2(i915)) intel_cdclk_pcode_post_notify(state); - if (pipe != INVALID_PIPE && - old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) { - drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed); + if (!new_cdclk_state->disable_pipes && + new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk) + pipe = new_cdclk_state->pipe; + else + pipe = INVALID_PIPE; - intel_set_cdclk(i915, &new_cdclk_state->actual, pipe); - } + drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed); + + intel_set_cdclk(i915, &new_cdclk_state->actual, pipe); } static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) @@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa return NULL; cdclk_state->pipe = INVALID_PIPE; + cdclk_state->disable_pipes = false; return &cdclk_state->base; } @@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; + new_cdclk_state->disable_pipes = true; + drm_dbg_kms(&dev_priv->drm, "Modeset required for cdclk change\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 48fd7d39e0cd..71bc032bfef1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -51,6 +51,9 @@ struct intel_cdclk_state { /* bitmask of active pipes */ u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; }; int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index f8b33999d43f..0d3da55e1c24 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = i915_gem_object_get_dma_address(obj, 0); + base = plane_state->phys_dma_addr; else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c587a8efeafc..c17462b4c2ac 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1, static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1, const struct intel_crtc_state *crtc_state2) { + /* + * FIXME the modeset sequence is currently wrong and + * can't deal with bigjoiner + port sync at the same time. + */ return crtc_state1->hw.active && crtc_state2->hw.active && + !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes && crtc_state1->output_types == crtc_state2->output_types && crtc_state1->output_format == crtc_state2->output_format && crtc_state1->lane_count == crtc_state2->lane_count && diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ab2f52d21bad..8af9e6128277 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2709,15 +2709,6 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) */ intel_de_write(dev_priv, PIPESRC(pipe), PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); - - if (!crtc_state->enable_psr2_su_region_et) - return; - - width = drm_rect_width(&crtc_state->psr2_su_area); - height = drm_rect_height(&crtc_state->psr2_su_area); - - intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe), - PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); } static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index fe4268813786..9b1bce2624b9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -47,6 +47,7 @@ struct drm_printer; #define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13) #define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb) #define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) +#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915)) #define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) #define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) #define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e67cd5b02e84..bf3f942e19c3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -727,6 +727,7 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; + u32 phys_dma_addr; /* for cursor_needs_physical */ /* Plane pxp decryption state */ bool decrypt; @@ -1422,6 +1423,8 @@ struct intel_crtc_state { u32 psr2_man_track_ctl; + u32 pipe_srcsz_early_tpt; + struct drm_rect psr2_su_area; /* Variable Refresh Rate state */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f0c3ed37b350..e583515f9b25 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -67,6 +67,7 @@ #include "intel_dp_tunnel.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" +#include "intel_drrs.h" #include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" @@ -498,7 +499,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) /* The values must be in increasing order */ static const int mtl_rates[] = { 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, - 810000, 1000000, 1350000, 2000000, + 810000, 1000000, 2000000, }; static const int icl_rates[] = { 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000, @@ -1421,7 +1422,8 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, if (DISPLAY_VER(dev_priv) >= 12) return true; - if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A) + if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A && + !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) return true; return false; @@ -1916,8 +1918,9 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp || - valid_dsc_bpp[i] > dsc_max_bpp) + if (valid_dsc_bpp[i] < dsc_min_bpp) + continue; + if (valid_dsc_bpp[i] > dsc_max_bpp) break; ret = dsc_compute_link_config(intel_dp, @@ -2683,15 +2686,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA); } -static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915, - enum transcoder cpu_transcoder) -{ - if (HAS_DOUBLE_BUFFERED_M_N(i915)) - return true; - - return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); -} - static bool can_enable_drrs(struct intel_connector *connector, const struct intel_crtc_state *pipe_config, const struct drm_display_mode *downclock_mode) @@ -2714,7 +2708,7 @@ static bool can_enable_drrs(struct intel_connector *connector, if (pipe_config->has_pch_encoder) return false; - if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) + if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) return false; return downclock_mode && @@ -2731,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector, intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode); int pixel_clock; - if (has_seamless_m_n(connector)) + /* + * FIXME all joined pipes share the same transcoder. + * Need to account for that when updating M/N live. + */ + if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes) pipe_config->update_m_n = true; if (!can_enable_drrs(connector, pipe_config, downclock_mode)) { @@ -6565,6 +6563,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; else intel_connector->get_hw_state = intel_connector_get_hw_state; + intel_connector->sync_state = intel_dp_connector_sync_state; if (!intel_edp_init_connector(intel_dp, intel_connector)) { intel_dp_aux_fini(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index b98a87883fef..9db43bd81ce2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector, u8 bcaps; int ret; + *hdcp_capable = false; + *hdcp2_capable = false; if (!intel_encoder_is_mst(connector->encoder)) return -EINVAL; ret = _intel_dp_hdcp2_get_capability(aux, hdcp2_capable); if (ret) - return ret; + drm_dbg_kms(&i915->drm, + "HDCP2 DPCD capability read failed err: %d\n", ret); ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 53aec023ce92..b651c990af85 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1355,7 +1355,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } - if (DISPLAY_VER(dev_priv) >= 10 && + if (HAS_DSC_MST(dev_priv) && drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) { /* * TBD pass the connector BPC, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index ff480f171f75..b6d24410740f 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2554,7 +2554,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { - return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) && + return ((IS_ELKHARTLAKE(i915) && IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->display.dpll.ref_clks.nssc == 38400; diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 169ef38ff188..597f8bd6aa1a 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -63,6 +63,15 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type) return str[drrs_type]; } +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder) +{ + if (HAS_DOUBLE_BUFFERED_M_N(i915)) + return true; + + return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); +} + static void intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc, enum drrs_refresh_rate refresh_rate) @@ -312,9 +321,8 @@ static int intel_drrs_debugfs_status_show(struct seq_file *m, void *unused) mutex_lock(&crtc->drrs.mutex); seq_printf(m, "DRRS capable: %s\n", - str_yes_no(crtc_state->has_drrs || - HAS_DOUBLE_BUFFERED_M_N(i915) || - intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder))); + str_yes_no(intel_cpu_transcoder_has_drrs(i915, + crtc_state->cpu_transcoder))); seq_printf(m, "DRRS enabled: %s\n", str_yes_no(crtc_state->has_drrs)); diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h index 8ef5f93a80ff..0982f95eab72 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.h +++ b/drivers/gpu/drm/i915/display/intel_drrs.h @@ -9,12 +9,15 @@ #include enum drrs_type; +enum transcoder; struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_connector; +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder); const char *intel_drrs_type_str(enum drrs_type drrs_type); bool intel_drrs_is_active(struct intel_crtc *crtc); void intel_drrs_activate(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index d62e050185e7..e4515bf92038 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); } +static u32 dsb_chicken(struct intel_crtc *crtc) +{ + if (crtc->mode_flags & I915_MODE_FLAG_VRR) + return DSB_CTRL_WAIT_SAFE_WINDOW | + DSB_CTRL_NO_WAIT_VBLANK | + DSB_INST_WAIT_SAFE_WINDOW | + DSB_INST_NO_WAIT_VBLANK; + else + return 0; +} + static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, int dewake_scanline) { @@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id), ctrl | DSB_ENABLE); + intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id), + dsb_chicken(crtc)); + intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 7b42aef37d2f..b6df9baf481b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) return PTR_ERR(vma); plane_state->ggtt_vma = vma; + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (phys_cursor) + plane_state->phys_dma_addr = + i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); } else { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 6927785fd6ff..aabd018bd737 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } + /* + * FIXME figure out what is wrong with PSR+bigjoiner and + * fix it. Presumably something related to the fact that + * PSR is a transcoder level feature. + */ + if (crtc_state->bigjoiner_pipes) { + drm_dbg_kms(&dev_priv->drm, + "PSR disabled due to bigjoiner\n"); + return; + } + if (CAN_PANEL_REPLAY(intel_dp)) crtc_state->has_panel_replay = true; else @@ -1994,6 +2005,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; struct intel_encoder *encoder; @@ -2013,6 +2025,12 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder), crtc_state->psr2_man_track_ctl); + + if (!crtc_state->enable_psr2_su_region_et) + return; + + intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), + crtc_state->pipe_srcsz_early_tpt); } static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, @@ -2051,6 +2069,20 @@ exit: crtc_state->psr2_man_track_ctl = val; } +static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state, + bool full_update) +{ + int width, height; + + if (!crtc_state->enable_psr2_su_region_et || full_update) + return 0; + + width = drm_rect_width(&crtc_state->psr2_su_area); + height = drm_rect_height(&crtc_state->psr2_su_area); + + return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1); +} + static void clip_area_update(struct drm_rect *overlap_damage_area, struct drm_rect *damage_area, struct drm_rect *pipe_src) @@ -2095,21 +2127,36 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st * cursor fully when cursor is in SU area. */ static void -intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state, - struct intel_plane_state *cursor_state) +intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_rect inter; + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state; + struct intel_plane *plane; + int i; - if (!crtc_state->enable_psr2_su_region_et || - !cursor_state->uapi.visible) + if (!crtc_state->enable_psr2_su_region_et) return; - inter = crtc_state->psr2_su_area; - if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst)) - return; + for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) { + struct drm_rect inter; - clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst, - &crtc_state->pipe_src); + if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc) + continue; + + if (plane->id != PLANE_CURSOR) + continue; + + if (!new_plane_state->uapi.visible) + continue; + + inter = crtc_state->psr2_su_area; + if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) + continue; + + clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst, + &crtc_state->pipe_src); + } } /* @@ -2152,8 +2199,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - struct intel_plane_state *new_plane_state, *old_plane_state, - *cursor_plane_state = NULL; + struct intel_plane_state *new_plane_state, *old_plane_state; struct intel_plane *plane; bool full_update = false; int i, ret; @@ -2238,13 +2284,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1; clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src); - - /* - * Cursor plane new state is stored to adjust su area to cover - * cursor are fully. - */ - if (plane->id == PLANE_CURSOR) - cursor_plane_state = new_plane_state; } /* @@ -2273,9 +2312,13 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (ret) return ret; - /* Adjust su area to cover cursor fully as necessary */ - if (cursor_plane_state) - intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state); + /* + * Adjust su area to cover cursor fully as necessary (early + * transport). This needs to be done after + * drm_atomic_add_affected_planes to ensure visible cursor is added into + * affected planes even when cursor is not updated by itself. + */ + intel_psr2_sel_fetch_et_alignment(state, crtc); intel_psr2_sel_fetch_pipe_alignment(crtc_state); @@ -2338,6 +2381,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, skip_sel_fetch_set_loop: psr2_man_trk_ctl_calc(crtc_state, full_update); + crtc_state->pipe_srcsz_early_tpt = + psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 5f9e748adc89..0cd9c183f621 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1842,8 +1842,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; - encoder->audio_disable(encoder, old_crtc_state, conn_state); - intel_sdvo_set_active_outputs(intel_sdvo, 0); if (0) intel_sdvo_set_encoder_power_state(intel_sdvo, @@ -1935,8 +1933,6 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_set_encoder_power_state(intel_sdvo, DRM_MODE_DPMS_ON); intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag); - - encoder->audio_enable(encoder, pipe_config, conn_state); } static enum drm_mode_status diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5d905f932cb4..f542ee1db1d9 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, const struct drm_display_info *info = &connector->base.display_info; int vmin, vmax; + /* + * FIXME all joined pipes share the same transcoder. + * Need to account for that during VRR toggle/push/etc. + */ + if (crtc_state->bigjoiner_pipes) + return; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return; @@ -187,10 +194,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; /* - * TRANS_SET_CONTEXT_LATENCY with VRR enabled - * requires this chicken bit on ADL/DG2. + * This bit seems to have two meanings depending on the platform: + * TGL: generate VRR "safe window" for DSB vblank waits + * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ - if (DISPLAY_VER(dev_priv) == 13) + if (IS_DISPLAY_VER(dev_priv, 12, 13)) intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e941e2e4fd14..860574d04f88 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2295,6 +2295,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915, if (HAS_4TILE(i915)) caps |= INTEL_PLANE_CAP_TILING_4; + if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915)) + return caps; + if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) { caps |= INTEL_PLANE_CAP_CCS_RC; if (DISPLAY_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index fa46d2308b0e..81bf2216371b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm) struct i915_vma *vma; int ret; + if (!intel_gt_needs_wa_16018031267(vm->gt)) + return 0; + /* The memory will be used only by GPU. */ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_VOLATILE | diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1ade568ffbfa..7a6dc371c384 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) info->engine_mask &= ~BIT(GSC0); } + /* + * Do not create the command streamer for CCS slices beyond the first. + * All the workload submitted to the first engine will be shared among + * all the slices. + * + * Once the user will be allowed to customize the CCS mode, then this + * check needs to be removed. + */ + if (IS_DG2(gt->i915)) { + u8 first_ccs = __ffs(CCS_MASK(gt)); + + /* Mask off all the CCS engine */ + info->engine_mask &= ~GENMASK(CCS3, CCS0); + /* Put back in the first CCS engine */ + info->engine_mask |= BIT(_CCS(first_ccs)); + } + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 96bdb93a948d..fb7bff27b45a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42aade0faf2d..b061a0a0d6b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a425db5ed3a2..6a2c2718bcc3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, return I915_MAP_WC; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) +{ + /* Wa_16018031267, Wa_16018063123 */ + return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); +} + bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) { return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 608f5c872928..003eb93b826f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -82,17 +82,18 @@ struct drm_printer; ##__VA_ARGS__); \ } while (0) -#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ - IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ - engine->class == COPY_ENGINE_CLASS && engine->instance == 0) - static inline bool gt_is_root(struct intel_gt *gt) { return !gt->info.id; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt); bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + intel_gt_needs_wa_16018031267(engine->gt) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c new file mode 100644 index 000000000000..044219c5960a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_ccs_mode.h" +#include "intel_gt_regs.h" + +void intel_gt_apply_ccs_mode(struct intel_gt *gt) +{ + int cslice; + u32 mode = 0; + int first_ccs = __ffs(CCS_MASK(gt)); + + if (!IS_DG2(gt->i915)) + return; + + /* Build the value for the fixed CCS load balancing */ + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { + if (CCS_MASK(gt) & BIT(cslice)) + /* + * If available, assign the cslice + * to the first available engine... + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); + + else + /* + * ... otherwise, mark the cslice as + * unavailable if no CCS dispatches here + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, + XEHP_CCS_MODE_CSLICE_MASK); + } + + intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h new file mode 100644 index 000000000000..9e5549caeb26 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __INTEL_GT_CCS_MODE_H__ +#define __INTEL_GT_CCS_MODE_H__ + +struct intel_gt; + +void intel_gt_apply_ccs_mode(struct intel_gt *gt); + +#endif /* __INTEL_GT_CCS_MODE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 50962cfd1353..743fe3566722 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1477,8 +1477,14 @@ #define ECOBITS_PPGTT_CACHE4B (0 << 8) #define GEN12_RCU_MODE _MMIO(0x14800) +#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define XEHP_CCS_MODE _MMIO(0x14804) +#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ +#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) +#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) + #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d67d44611c28..6ec3582c9735 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -10,6 +10,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_ccs_mode.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -51,7 +52,8 @@ * registers belonging to BCS, VCS or VECS should be implemented in * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific * engine's MMIO range but that are part of of the common RCS/CCS reset domain - * should be implemented in general_render_compute_wa_init(). + * should be implemented in general_render_compute_wa_init(). The settings + * about the CCS load balancing should be added in ccs_engine_wa_mode(). * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -1653,6 +1655,7 @@ static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -2853,6 +2856,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt, wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); } +static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct intel_gt *gt = engine->gt; + + if (!IS_DG2(gt->i915)) + return; + + /* + * Wa_14019159160: This workaround, along with others, leads to + * significant challenges in utilizing load balancing among the + * CCS slices. Consequently, an architectural decision has been + * made to completely disable automatic CCS load balancing. + */ + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); + + /* + * After having disabled automatic load balancing we need to + * assign all slices to a single CCS. We will call it CCS mode 1 + */ + intel_gt_apply_ccs_mode(gt); +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -3003,8 +3028,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal * to a single RCS/CCS engine's workaround list since * they're reset as part of the general render domain reset. */ - if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { general_render_compute_wa_init(engine, wal); + ccs_engine_wa_mode(engine, wal); + } if (engine->class == COMPUTE_CLASS) ccs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f3dcae4b9d45..0f83c6d4376f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc) * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through * every possible call stack is unfeasible. It would be too intrusive to many * areas that really don't care about the GuC backend. However, there is the - * 'reset_in_progress' flag available, so just use that. + * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked. + * So just use those. Note that testing both is required due to the hideously + * complex nature of the i915 driver's reset code paths. * * And note that in the case of a reset occurring during driver unload - * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set - * is fine because there is another cancel in _finish (when the reset flag is - * not). + * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the + * reset flag/mutex are set) is fine because there is another explicit cancel in + * intel_guc_submission_fini (when the reset flag/mutex are not). */ - if (guc_to_gt(guc)->uc.reset_in_progress) + if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) || + test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags)) cancel_delayed_work(&guc->timestamp.work); else cancel_delayed_work_sync(&guc->timestamp.work); @@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; - guc_cancel_busyness_worker(guc); - spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); @@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { - /* - * Ensure the busyness worker gets cancelled even on a fatal wedge. - * Note that reset_prepare is not allowed to because it confuses lockdep. - */ - if (guc_submission_initialized(guc)) - guc_cancel_busyness_worker(guc); - /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || !intel_guc_is_fw_running(guc) || @@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->submission_initialized) return; + guc_fini_engine_stats(guc); guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6dfe5d9456c6..399bc319180b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; + /* + * NB: The wedge code path results in prepare -> prepare -> finish -> finish. + * So this function is sometimes called with the in-progress flag not set. + */ uc->reset_in_progress = false; /* Firmware expected to be running when this function is called */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 9ee902d5b72c..4b9233c07a22 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -800,7 +800,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_cleanup_modeset2; ret = intel_pxp_init(i915); - if (ret != -ENODEV) + if (ret && ret != -ENODEV) drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); ret = intel_display_driver_probe(i915); diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8c3f443c8347..b758fd110c20 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, struct intel_uncore *uncore = ddat->uncore; intel_wakeref_t wakeref; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) intel_uncore_rmw(uncore, reg, clear, set); - mutex_unlock(&hwmon->hwmon_lock); + mutex_unlock(&hwmon->hwmon_lock); + } } /* @@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy) else rgaddr = hwmon->rg.energy_status_all; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) reg_val = intel_uncore_read(uncore, rgaddr); - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; - ei->reg_val_prev = reg_val; + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + ei->reg_val_prev = reg_val; - *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, - hwmon->scl_shift_energy); - mutex_unlock(&hwmon->hwmon_lock); + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); + mutex_unlock(&hwmon->hwmon_lock); + } } static ssize_t @@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) /* Block waiting for GuC reset to complete when needed */ for (;;) { + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); mutex_lock(&hwmon->hwmon_lock); prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE); @@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) } mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); schedule(); } finish_wait(&ddat->waitq, &wait); if (ret) - goto unlock; - - wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + goto exit; /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { @@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: - intel_runtime_pm_put(ddat->uncore->rpm, wakeref); -unlock: mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index ba82277254b7..cc41974cee74 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "i915_memcpy.h" diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e00557e1a57f..3b2e49ce29ba 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4599,7 +4599,7 @@ #define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \ _MTL_CHICKEN_TRANS_A, \ _MTL_CHICKEN_TRANS_B) -#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */ +#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */ #define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */ #define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) #define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d09aad34ba37..b70715b1411d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,6 +34,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "gt/intel_tlb.h" @@ -103,12 +104,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we and our _retire() counterpart can be + * called asynchronously, storing a wakeref tracking + * handle inside struct i915_vma is not safe, and + * there is no other good place for that. Hence, + * use untracked variants of intel_gt_pm_get/put(). + */ + intel_gt_pm_get_untracked(vma->vm->gt); + } + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async_untracked(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -1404,7 +1435,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1424,8 +1455,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1561,8 +1598,7 @@ err_fence: if (work) dma_fence_work_commit_imm(&work->base); err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 0674aca0f8a3..cf0b1de1c071 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) if (adreno_is_a618(gpu)) gpu->ubwc_config.highest_bank_bit = 14; + if (adreno_is_a619(gpu)) + /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ + gpu->ubwc_config.highest_bank_bit = 13; + if (adreno_is_a619_holi(gpu)) gpu->ubwc_config.highest_bank_bit = 13; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 1f5245fc2cdc..a847a0f7a73c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu, (block->type << 8) | i); in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, - block->size, dumper->iova + A6XX_CD_DATA_OFFSET); + block->size, out); out += block->size * sizeof(u32); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index 9a9f7092c526..a3e60ac70689 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h @@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = { }, }; +/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */ static const struct dpu_intf_cfg x1e80100_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .name = "intf_3", .id = INTF_3, .base = 0x37000, .len = 0x280, .features = INTF_SC7280_MASK, - .type = INTF_DP, - .controller_id = MSM_DP_CONTROLLER_1, + .type = INTF_NONE, + .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */ .prog_fetch_lines_worst_case = 24, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), @@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .base = 0x38000, .len = 0x280, .features = INTF_SC7280_MASK, .type = INTF_DP, - .controller_id = MSM_DP_CONTROLLER_2, + .controller_id = MSM_DP_CONTROLLER_1, .prog_fetch_lines_worst_case = 24, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21), @@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = { .prog_fetch_lines_worst_case = 24, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23), + }, { + .name = "intf_6", .id = INTF_6, + .base = 0x3A000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_2, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + }, { + .name = "intf_7", .id = INTF_7, + .base = 0x3b000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_NONE, + .controller_id = MSM_DP_CONTROLLER_2, /* pair with intf_6 for DP MST */ + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19), + }, { + .name = "intf_8", .id = INTF_8, + .base = 0x3c000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_NONE, + .controller_id = MSM_DP_CONTROLLER_1, /* pair with intf_4 for DP MST */ + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13), }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index ef871239adb2..68fae048a9a8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent) &perf->core_clk_rate); debugfs_create_u32("enable_bw_release", 0600, entry, (u32 *)&perf->enable_bw_release); - debugfs_create_u32("threshold_low", 0600, entry, + debugfs_create_u32("threshold_low", 0400, entry, (u32 *)&perf->perf_cfg->max_bw_low); - debugfs_create_u32("threshold_high", 0600, entry, + debugfs_create_u32("threshold_high", 0400, entry, (u32 *)&perf->perf_cfg->max_bw_high); - debugfs_create_u32("min_core_ib", 0600, entry, + debugfs_create_u32("min_core_ib", 0400, entry, (u32 *)&perf->perf_cfg->min_core_ib); - debugfs_create_u32("min_llcc_ib", 0600, entry, + debugfs_create_u32("min_llcc_ib", 0400, entry, (u32 *)&perf->perf_cfg->min_llcc_ib); - debugfs_create_u32("min_dram_ib", 0600, entry, + debugfs_create_u32("min_dram_ib", 0400, entry, (u32 *)&perf->perf_cfg->min_dram_ib); debugfs_create_file("perf_mode", 0600, entry, (u32 *)perf, &dpu_core_perf_mode_fops); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index 946dd0135dff..6a0a74832fb6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int ret; if (!irq_cb) { - DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n", - DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb); + DPU_ERROR("IRQ=[%d, %d] NULL callback\n", + DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx)); return -EINVAL; } if (!dpu_core_irq_is_valid(irq_idx)) { - DPU_ERROR("invalid IRQ=[%d, %d]\n", - DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx)); + DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n", + DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb); return -EINVAL; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index c4cb82af5c2f..ffbfde922589 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp) } } -static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp) +static int dp_display_handle_port_status_changed(struct dp_display_private *dp) { int rc = 0; @@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev) drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n", dp->hpd_state, sink_request); if (sink_request & DS_PORT_STATUS_CHANGED) - rc = dp_display_handle_port_ststus_changed(dp); + rc = dp_display_handle_port_status_changed(dp); else rc = dp_display_handle_irq_hpd(dp); } @@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) ret = dp_display_usbpd_configure_cb(&pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; + pm_runtime_put_sync(&pdev->dev); } else { dp->hpd_state = ST_MAINLINK_READY; } @@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp_display_host_phy_exit(dp); dp->hpd_state = ST_DISCONNECTED; dp_display_notify_disconnect(&dp->dp_display.pdev->dev); + pm_runtime_put_sync(&pdev->dev); mutex_unlock(&dp->event_mutex); return 0; } diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index e3f61c39df69..80166f702a0d 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb, for (i = 0; i < n; i++) { ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]); - drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)", + drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n", fb->base.id, i, msm_fb->iova[i], ret); if (ret) return ret; @@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, const struct msm_format *format; int ret, i, n; - drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)", + drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n", mode_cmd, mode_cmd->width, mode_cmd->height, (char *)&mode_cmd->pixel_format); @@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, refcount_set(&msm_fb->dirtyfb, 1); - drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb); + drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb); return fb; diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 84c21ec2ceea..af6a6fcb1173 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc) struct msm_kms *kms = priv->kms; if (!kms) return -ENXIO; - drm_dbg_vbl(dev, "crtc=%u", crtc->base.id); + drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id); return vblank_ctrl_queue_work(priv, crtc, true); } @@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc) struct msm_kms *kms = priv->kms; if (!kms) return; - drm_dbg_vbl(dev, "crtc=%u", crtc->base.id); + drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id); vblank_ctrl_queue_work(priv, crtc, false); } diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 479effcf607e..79cfab53f80e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 12feecf71e75..6fb65b01d778 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -378,9 +378,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -406,11 +406,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 7de7707ec6a8..a72c45809484 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -225,12 +225,18 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector, u8 *dpcd = nv_encoder->dp.dpcd; int ret = NOUVEAU_DP_NONE, hpd; - /* If we've already read the DPCD on an eDP device, we don't need to - * reread it as it won't change + /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we + * haven't probed them once before. */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && - dpcd[DP_DPCD_REV] != 0) - return NOUVEAU_DP_SST; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if (connector->status == connector_status_connected) + return NOUVEAU_DP_SST; + else if (connector->status == connector_status_disconnected) + return NOUVEAU_DP_NONE; + } + + // Ensure that the aux bus is enabled for probing + drm_dp_dpcd_set_powered(&nv_connector->aux, true); mutex_lock(&nv_encoder->dp.hpd_irq_lock); if (mstm) { @@ -293,6 +299,13 @@ out: if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST) nv50_mstm_remove(mstm); + /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected, + * and since we don't really have a usecase for that anyway - just disable the aux bus here + * if we've decided the connector is disconnected + */ + if (ret == NOUVEAU_DP_NONE) + drm_dp_dpcd_set_powered(&nv_connector->aux, false); + mutex_unlock(&nv_encoder->dp.hpd_irq_lock); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 0a0a11dc9ec0..ee02cd833c5e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -812,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r, struct drm_gpuva_op_unmap *u = r->unmap; struct nouveau_uvma *uvma = uvma_from_va(u->va); u64 addr = uvma->va.va.addr; - u64 range = uvma->va.va.range; + u64 end = uvma->va.va.addr + uvma->va.va.range; if (r->prev) addr = r->prev->va.addr + r->prev->va.range; if (r->next) - range = r->next->va.addr - addr; + end = r->next->va.addr; - op_unmap_range(u, addr, range); + op_unmap_range(u, addr, end - addr); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 986e8d547c94..060c74a80eb1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -420,7 +420,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch, return ret; } else { ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, - &args, sizeof(args));; + &args, sizeof(args)); if (ret) return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b57101..cb05f7f48a98 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c index 7bcbc4895ec2..271bfa038f5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c @@ -25,6 +25,7 @@ #include #include +#include void gm107_devinit_disable(struct nvkm_devinit *init) @@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init) u32 r021c00 = nvkm_rd32(device, 0x021c00); u32 r021c04 = nvkm_rd32(device, 0x021c04); - if (r021c00 & 0x00000001) - nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0); - if (r021c00 & 0x00000004) - nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2); + /* gsp only wants to enable/disable display */ + if (!nvkm_gsp_rm(device->gsp)) { + if (r021c00 & 0x00000001) + nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0); + if (r021c00 & 0x00000004) + nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2); + } if (r021c04 & 0x00000001) nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c index 11b4c9c274a1..666eb93b1742 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c @@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw, rm->dtor = r535_devinit_dtor; rm->post = hw->post; + rm->disable = hw->disable; ret = nv50_devinit_new_(rm, device, type, inst, pdevinit); if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9994cbd6f1c4..9858c1438aa7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); - strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES]; + strings = (char *)rpc + str_offset; for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) { int name_len = strlen(r535_registry_entries[i].name) + 1; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index a7f3fc342d87..dd5b5a17ece0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c index cb7406d74466..c39fe0fc5d69 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c @@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi) struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index 775144695283..b15ca56a09a7 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi) struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 9063ce254642..fd8e44992184 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -441,19 +441,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev) gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "shader power transition timeout"); gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "tiler power transition timeout"); gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, - val, !val, 0, 1000); + val, !val, 0, 2000); if (ret) dev_err(pfdev->dev, "l2 power transition timeout"); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index f38385fe76bb..b91019cd5acb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, mapping_set_unevictable(mapping); for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) { + /* Can happen if the last fault only partially filled this + * section of the pages array before failing. In that case + * we skip already filled pages. + */ + if (pages[i]) + continue; + pages[i] = shmem_read_mapping_page(mapping, i); if (IS_ERR(pages[i])) { ret = PTR_ERR(pages[i]); pages[i] = NULL; - goto err_pages; + goto err_unlock; } } @@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, ret = sg_alloc_table_from_pages(sgt, pages + page_offset, NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL); if (ret) - goto err_pages; + goto err_unlock; ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0); if (ret) @@ -537,8 +544,6 @@ out: err_map: sg_free_table(sgt); -err_pages: - drm_gem_shmem_put_pages(&bo->base); err_unlock: dma_resv_unlock(obj->resv); err_bo: diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 281edab518cd..d6ea01f3797b 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -421,7 +421,6 @@ int qxl_surface_id_alloc(struct qxl_device *qdev, { uint32_t handle; int idr_ret; - int count = 0; again: idr_preload(GFP_ATOMIC); spin_lock(&qdev->surf_id_idr_lock); @@ -433,7 +432,6 @@ again: handle = idr_ret; if (handle >= qdev->rom->n_surfaces) { - count++; spin_lock(&qdev->surf_id_idr_lock); idr_remove(&qdev->surf_id_idr, handle); spin_unlock(&qdev->surf_id_idr_lock); diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index dd0f834d881c..506ae1f5e099 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -145,7 +145,7 @@ static int qxl_process_single_command(struct qxl_device *qdev, struct qxl_release *release; struct qxl_bo *cmd_bo; void *fb_cmd; - int i, ret, num_relocs; + int i, ret; int unwritten; switch (cmd->type) { @@ -200,7 +200,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, } /* fill out reloc info structs */ - num_relocs = 0; for (i = 0; i < cmd->relocs_num; ++i) { struct drm_qxl_reloc reloc; struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs); @@ -230,7 +229,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, reloc_info[i].dst_bo = cmd_bo; reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset; } - num_relocs++; /* reserve and validate the reloc dst bo */ if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) { diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 368d26da0d6a..9febc8b73f09 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct qxl_device *qdev; + struct qxl_release *release; + int count = 0, sc = 0; + bool have_drawable_releases; unsigned long cur, end = jiffies + timeout; qdev = container_of(fence->lock, struct qxl_device, release_lock); + release = container_of(fence, struct qxl_release, base); + have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE; - if (!wait_event_timeout(qdev->release_event, - (dma_fence_is_signaled(fence) || - (qxl_io_notify_oom(qdev), 0)), - timeout)) - return 0; +retry: + sc++; + if (dma_fence_is_signaled(fence)) + goto signaled; + + qxl_io_notify_oom(qdev); + + for (count = 0; count < 11; count++) { + if (!qxl_queue_garbage_collect(qdev, true)) + break; + + if (dma_fence_is_signaled(fence)) + goto signaled; + } + + if (dma_fence_is_signaled(fence)) + goto signaled; + + if (have_drawable_releases || sc < 4) { + if (sc > 2) + /* back off */ + usleep_range(500, 1000); + + if (time_after(jiffies, end)) + return 0; + + if (have_drawable_releases && sc > 300) { + DMA_FENCE_WARN(fence, + "failed to wait on release %llu after spincount %d\n", + fence->context & ~0xf0000000, sc); + goto signaled; + } + goto retry; + } + /* + * yeah, original sync_obj_wait gave up after 3 spins when + * have_drawable_releases is not set. + */ + +signaled: cur = jiffies; if (time_after(cur, end)) return 0; diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h index 94947229888b..b7f22597ee95 100644 --- a/drivers/gpu/drm/radeon/pptable.h +++ b/drivers/gpu/drm/radeon/pptable.h @@ -424,7 +424,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{ typedef struct _ATOM_PPLIB_STATE_V2 { //number of valid dpm levels in this state; Driver uses it to calculate the whole - //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR) + //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels) UCHAR ucNumDPMLevels; //a index to the array of nonClockInfos @@ -432,14 +432,14 @@ typedef struct _ATOM_PPLIB_STATE_V2 /** * Driver will read the first ucNumDPMLevels in this array */ - UCHAR clockInfoIndex[1]; + UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels); } ATOM_PPLIB_STATE_V2; typedef struct _StateArray{ //how many states we have UCHAR ucNumEntries; - ATOM_PPLIB_STATE_V2 states[1]; + ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries); }StateArray; @@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{ //sizeof(ATOM_PPLIB_CLOCK_INFO) UCHAR ucEntrySize; - UCHAR clockInfo[1]; + UCHAR clockInfo[] __counted_by(ucNumEntries); }ClockInfoArray; typedef struct _NonClockInfoArray{ @@ -460,7 +460,7 @@ typedef struct _NonClockInfoArray{ //sizeof(ATOM_PPLIB_NONCLOCK_INFO) UCHAR ucEntrySize; - ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1]; + ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries); }NonClockInfoArray; typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bb1f0a3371ab..10793a433bf5 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -923,8 +923,12 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO; for (i = 0; i < max_device; i++) { - ATOM_CONNECTOR_INFO_I2C ci = - supported_devices->info.asConnInfo[i]; + ATOM_CONNECTOR_INFO_I2C ci; + + if (frev > 1) + ci = supported_devices->info_2d1.asConnInfo[i]; + else + ci = supported_devices->info.asConnInfo[i]; bios_connectors[i].valid = false; diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 48170694ac6b..18efb3fe1c00 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -17,9 +17,7 @@ static const uint32_t formats_cluster[] = { DRM_FORMAT_XRGB2101010, - DRM_FORMAT_ARGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ABGR2101010, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, DRM_FORMAT_XBGR8888, diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 3c4f5a392b06..58c8161289fe 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -71,13 +71,19 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + /* + * It's perfectly valid to initialize an entity without having a valid + * scheduler attached. It's just not valid to use the scheduler before it + * is initialized itself. + */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - if (!sched_list[0]->sched_rq) { - /* Warn drivers not to do this and to fix their DRM - * calling order. + if (num_sched_list && !sched_list[0]->sched_rq) { + /* Since every entry covered by num_sched_list + * should be non-NULL and therefore we warn drivers + * not to do this and to fix their DRM calling order. */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 112438d965ff..6e1fd6985ffc 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order) { - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) + if (pool->use_dma_alloc) return &pool->caching[caching].orders[order]; #ifdef CONFIG_X86 switch (caching) { case ttm_write_combined: + if (pool->nid != NUMA_NO_NODE) + return &pool->caching[caching].orders[order]; + if (pool->use_dma32) return &global_dma32_write_combined[order]; return &global_write_combined[order]; case ttm_uncached: + if (pool->nid != NUMA_NO_NODE) + return &pool->caching[caching].orders[order]; + if (pool->use_dma32) return &global_dma32_uncached[order]; @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32; - if (use_dma_alloc || nid != NUMA_NO_NODE) { - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < NR_PAGE_ORDERS; ++j) - ttm_pool_type_init(&pool->caching[i].orders[j], - pool, i, j); + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { + for (j = 0; j < NR_PAGE_ORDERS; ++j) { + struct ttm_pool_type *pt; + + /* Initialize only pool types which are actually used */ + pt = ttm_pool_select_type(pool, i, j); + if (pt != &pool->caching[i].orders[j]) + continue; + + ttm_pool_type_init(pt, pool, i, j); + } } } EXPORT_SYMBOL(ttm_pool_init); @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j; - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) { - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < NR_PAGE_ORDERS; ++j) - ttm_pool_type_fini(&pool->caching[i].orders[j]); + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { + for (j = 0; j < NR_PAGE_ORDERS; ++j) { + struct ttm_pool_type *pt; + + pt = ttm_pool_select_type(pool, i, j); + if (pt != &pool->caching[i].orders[j]) + continue; + + ttm_pool_type_fini(pt); + } } /* We removed the pool types from the LRU, but we need to also make sure diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2e04f6cb661e..ce6b2fb341d1 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_BIN]; - file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN]; file->jobs_sent[V3D_BIN]++; v3d->queue[V3D_BIN].jobs_sent++; @@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_RENDER]; - file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER]; file->jobs_sent[V3D_RENDER]++; v3d->queue[V3D_RENDER].jobs_sent++; @@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_CSD]; - file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD]; file->jobs_sent[V3D_CSD]++; v3d->queue[V3D_CSD].jobs_sent++; @@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_TFU]; - file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU]; file->jobs_sent[V3D_TFU]++; v3d->queue[V3D_TFU].jobs_sent++; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index c52c7bf1485b..717d624e9a05 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, .no_wait_gpu = false }; u32 j, initial_line = dst_offset / dst_stride; - struct vmw_bo_blit_line_data d; + struct vmw_bo_blit_line_data d = {0}; int ret = 0; + struct page **dst_pages = NULL; + struct page **src_pages = NULL; /* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count)) @@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; } + if (!src->ttm->pages && src->ttm->sg) { + src_pages = kvmalloc_array(src->ttm->num_pages, + sizeof(struct page *), GFP_KERNEL); + if (!src_pages) + return -ENOMEM; + ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages, + src->ttm->num_pages); + if (ret) + goto out; + } + if (!dst->ttm->pages && dst->ttm->sg) { + dst_pages = kvmalloc_array(dst->ttm->num_pages, + sizeof(struct page *), GFP_KERNEL); + if (!dst_pages) { + ret = -ENOMEM; + goto out; + } + ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages, + dst->ttm->num_pages); + if (ret) + goto out; + } + d.mapped_dst = 0; d.mapped_src = 0; d.dst_addr = NULL; d.src_addr = NULL; - d.dst_pages = dst->ttm->pages; - d.src_pages = src->ttm->pages; + d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages; + d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages; d.dst_num_pages = PFN_UP(dst->resource->size); d.src_num_pages = PFN_UP(src->resource->size); d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL); @@ -504,6 +529,10 @@ out: kunmap_atomic(d.src_addr); if (d.dst_addr) kunmap_atomic(d.dst_addr); + if (src_pages) + kvfree(src_pages); + if (dst_pages) + kvfree(dst_pages); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index bfd41ce3c8f4..e5eb21a471a6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -377,7 +377,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, { struct ttm_operation_ctx ctx = { .interruptible = params->bo_type != ttm_bo_type_kernel, - .no_wait_gpu = false + .no_wait_gpu = false, + .resv = params->resv, }; struct ttm_device *bdev = &dev_priv->bdev; struct drm_device *vdev = &dev_priv->drm; @@ -394,8 +395,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain); ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type, - &vmw_bo->placement, 0, &ctx, NULL, - NULL, destroy); + &vmw_bo->placement, 0, &ctx, + params->sg, params->resv, destroy); if (unlikely(ret)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 0d496dc9c6af..f349642e6190 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -55,6 +55,8 @@ struct vmw_bo_params { enum ttm_bo_type bo_type; size_t size; bool pin; + struct dma_resv *resv; + struct sg_table *sg; }; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d3e308fdfd5b..58fb40c93100 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - /* TTM currently doesn't fully support SEV encryption. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -EINVAL; - - if (vmw_force_coherent) + /* + * When running with SEV we always want dma mappings, because + * otherwise ttm tt pool pages will bounce through swiotlb running + * out of available space. + */ + if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT)) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; @@ -1444,12 +1445,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, @@ -1624,6 +1628,7 @@ static const struct drm_driver driver = { .prime_fd_to_handle = vmw_prime_fd_to_handle, .prime_handle_to_fd = vmw_prime_handle_to_fd, + .gem_prime_import_sg_table = vmw_prime_import_sg_table, .fops = &vmwgfx_driver_fops, .name = VMWGFX_DRIVER_NAME, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 12efecc17df6..b019a1a1787a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd); +struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *table); /* * MemoryOBject management - vmwgfx_mob.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 12787bb9c111..d6bcaf078b1f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -149,6 +149,38 @@ out_no_bo: return ret; } +struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *table) +{ + int ret; + struct vmw_private *dev_priv = vmw_priv(dev); + struct drm_gem_object *gem = NULL; + struct vmw_bo *vbo; + struct vmw_bo_params params = { + .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM, + .busy_domain = VMW_BO_DOMAIN_SYS, + .bo_type = ttm_bo_type_sg, + .size = attach->dmabuf->size, + .pin = false, + .resv = attach->dmabuf->resv, + .sg = table, + + }; + + dma_resv_lock(params.resv, NULL); + + ret = vmw_bo_create(dev_priv, ¶ms, &vbo); + if (ret != 0) + goto out_no_bo; + + vbo->tbo.base.funcs = &vmw_gem_object_funcs; + + gem = &vbo->tbo.base; +out_no_bo: + dma_resv_unlock(params.resv); + return gem; +} int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index cd4925346ed4..84ae4e10a2eb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct vmw_private *vmw = vmw_priv(crtc->dev); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc); @@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, bool has_primary = new_state->plane_mask & drm_plane_mask(crtc->primary); - /* We always want to have an active plane with an active CRTC */ - if (has_primary != new_state->enable) - return -EINVAL; + /* + * This is fine in general, but broken userspace might expect + * some actual rendering so give a clue as why it's blank. + */ + if (new_state->enable && !has_primary) + drm_dbg_driver(&vmw->drm, + "CRTC without a primary plane will be blank.\n"); if (new_state->connector_mask != connector_mask && diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index a94947b588e8..19a843da87b7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -243,10 +243,10 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { - DRM_FORMAT_XRGB1555, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, }; static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index 2d72a5ee7c0c..c99cad444991 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev, int fd, u32 *handle) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + int ret = ttm_prime_fd_to_handle(tfile, fd, handle); - return ttm_prime_fd_to_handle(tfile, fd, handle); + if (ret) + ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle); + + return ret; } int vmw_prime_handle_to_fd(struct drm_device *dev, @@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, int *prime_fd) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); + int ret; + + if (handle > VMWGFX_NUM_MOB) + ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 4d23d0a70bcb..621d98b376bb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) switch (dev_priv->map_mode) { case vmw_dma_map_bind: case vmw_dma_map_populate: - vsgt->sgt = &vmw_tt->sgt; - ret = sg_alloc_table_from_pages_segment( - &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, - (unsigned long)vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL); - if (ret) - goto out_sg_alloc_fail; + if (vmw_tt->dma_ttm.page_flags & TTM_TT_FLAG_EXTERNAL) { + vsgt->sgt = vmw_tt->dma_ttm.sg; + } else { + vsgt->sgt = &vmw_tt->sgt; + ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt, + vsgt->pages, vsgt->num_pages, 0, + (unsigned long)vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->drm.dev), + GFP_KERNEL); + if (ret) + goto out_sg_alloc_fail; + } ret = vmw_ttm_map_for_dma(vmw_tt); if (unlikely(ret != 0)) @@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) return 0; out_map_fail: - sg_free_table(vmw_tt->vsgt.sgt); - vmw_tt->vsgt.sgt = NULL; + drm_warn(&dev_priv->drm, "VSG table map failed!"); + sg_free_table(vsgt->sgt); + vsgt->sgt = NULL; out_sg_alloc_fail: return ret; } @@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) static int vmw_ttm_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { - int ret; + bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; - /* TODO: maybe completely drop this ? */ if (ttm_tt_is_populated(ttm)) return 0; - ret = ttm_pool_alloc(&bdev->pool, ttm, ctx); + if (external && ttm->sg) + return drm_prime_sg_to_dma_addr_array(ttm->sg, + ttm->dma_address, + ttm->num_pages); - return ret; + return ttm_pool_alloc(&bdev->pool, ttm, ctx); } static void vmw_ttm_unpopulate(struct ttm_device *bdev, @@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev, { struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt, dma_ttm); + bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + + if (external) + return; vmw_ttm_unbind(bdev, ttm); @@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo, { struct vmw_ttm_tt *vmw_be; int ret; + bool external = bo->type == ttm_bo_type_sg; vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL); if (!vmw_be) @@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo, vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev); vmw_be->mob = NULL; - if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent) + if (external) + page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; + + if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external) ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags, ttm_cached); else diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5a428ca00f10..c29a850859ad 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -172,8 +172,8 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -Ddrm_i915_gem_object=xe_bo \ -Ddrm_i915_private=xe_device -CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init) -CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init) +CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init +CFLAGS_i915-display/intel_display_device.o = -Wno-override-init # Rule to build SOC code shared with i915 $(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index b21da7b745a5..a9c1f9885c6b 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); if (ret) - return ret; + goto err; if (!(bo->flags & XE_BO_SCANOUT_BIT)) { /* @@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, */ if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) { ttm_bo_unreserve(&bo->ttm); - return -EINVAL; + ret = -EINVAL; + goto err; } bo->flags |= XE_BO_SCANOUT_BIT; } ttm_bo_unreserve(&bo->ttm); + return 0; +err: + xe_bo_put(bo); return ret; } diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index e4db069f0db3..6ec375c1c4b6 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe) xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); drmm_mutex_init(&xe->drm, &xe->sb_lock); - drmm_mutex_init(&xe->drm, &xe->display.backlight.lock); - drmm_mutex_init(&xe->drm, &xe->display.audio.mutex); - drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex); - drmm_mutex_init(&xe->drm, &xe->display.pps.mutex); - drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex); xe->enabled_irq_mask = ~0; err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 0b1266c88a6a..deddc8be48c0 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -125,7 +125,7 @@ #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244) +#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6603a0ea79c5..9c0837b6fdfc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, .mem_type = XE_PL_TT, }; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_TT; } } @@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } places[*c] = place; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = mem_type; } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo->props.preferred_gt == XE_GT1) { - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - } else { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - } + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, { u32 c = 0; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; - - /* The order of placements should indicate preferred location */ - - if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { - try_add_system(xe, bo, bo_flags, &c); - try_add_vram(xe, bo, bo_flags, &c); - } else { - try_add_vram(xe, bo, bo_flags, &c); - try_add_system(xe, bo, bo_flags, &c); - } + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); if (!c) @@ -1126,13 +1104,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static bool should_migrate_to_system(struct xe_bo *bo) -{ - struct xe_device *xe = xe_bo_device(bo); - - return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; -} - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; @@ -1141,7 +1112,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; vm_fault_t ret; - int idx, r = 0; + int idx; if (needs_rpm) xe_device_mem_access_get(xe); @@ -1153,17 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) if (drm_dev_enter(ddev, &idx)) { trace_xe_bo_cpu_fault(bo); - if (should_migrate_to_system(bo)) { - r = xe_bo_migrate(bo, XE_PL_TT); - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) - ret = VM_FAULT_NOPAGE; - else if (r) - ret = VM_FAULT_SIGBUS; - } - if (!ret) - ret = ttm_bo_vm_fault_reserved(vmf, - vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); @@ -1291,9 +1253,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; - bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; - bo->props.preferred_gt = XE_BO_PROPS_INVALID; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = XE_BO_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 14ef13b7b421..86422e113d39 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -56,25 +56,6 @@ struct xe_bo { */ struct list_head client_link; #endif - /** @props: BO user controlled properties */ - struct { - /** @preferred_mem: preferred memory class for this BO */ - s16 preferred_mem_class; - /** @prefered_gt: preferred GT for this BO */ - s16 preferred_gt; - /** @preferred_mem_type: preferred memory type */ - s32 preferred_mem_type; - /** - * @cpu_atomic: the CPU expects to do atomics operations to - * this BO - */ - bool cpu_atomic; - /** - * @device_atomic: the device expects to do atomics operations - * to this BO - */ - bool device_atomic; - } props; /** @freed: List node for delayed put. */ struct llist_node freed; /** @created: Whether the bo has passed initial creation */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca85e81fdb44..d32ff3857e65 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + if (xe->preempt_fence_wq) + destroy_workqueue(xe->preempt_fence_wq); + if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); @@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - if (!xe->ordered_wq || !xe->unordered_wq) { + if (!xe->ordered_wq || !xe->unordered_wq || + !xe->preempt_fence_wq) { + /* + * Cleanup done in xe_device_destroy via + * drmm_add_action_or_reset register above + */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 14be34d9f543..d413bc2c6be5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) { - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) gt_id = 0; return gt_id ? tile->media_gt : tile->primary_gt; @@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) if (MEDIA_VER(xe) >= 13) { gt = xe_tile_get_gt(root_tile, gt_id); } else { - if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) gt_id = 0; gt = xe->tiles[gt_id].primary_gt; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9785eef2e5a4..8e3a222b41cf 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,9 @@ struct xe_device { /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; + /** @preempt_fence_wq: used to serialize preempt fences */ + struct workqueue_struct *preempt_fence_wq; + /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 826c8b389672..cc5e0f75de3c 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,48 +94,16 @@ * Unlock all */ +/* + * Add validation and rebinding to the drm_exec locking loop, since both can + * trigger eviction which may require sleeping dma_resv locks. + */ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); - struct drm_gem_object *obj; - unsigned long index; - int num_fences; - int ret; - ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); - if (ret) - return ret; - - /* - * 1 fence slot for the final submit, and 1 more for every per-tile for - * GPU bind and 1 extra for CPU bind. Note that there are potentially - * many vma per object/dma-resv, however the fence slot will just be - * re-used, since they are largely the same timeline and the seqno - * should be in order. In the case of CPU bind there is dummy fence used - * for all CPU binds, so no need to have a per-tile slot for that. - */ - num_fences = 1 + 1 + vm->xe->info.tile_count; - - /* - * We don't know upfront exactly how many fence slots we will need at - * the start of the exec, since the TTM bo_validate above can consume - * numerous fence slots. Also due to how the dma_resv_reserve_fences() - * works it only ensures that at least that many fence slots are - * available i.e if there are already 10 slots available and we reserve - * two more, it can just noop without reserving anything. With this it - * is quite possible that TTM steals some of the fence slots and then - * when it comes time to do the vma binding and final exec stage we are - * lacking enough fence slots, leading to some nasty BUG_ON() when - * adding the fences. Hence just add our own fences here, after the - * validate stage. - */ - drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; + /* The fence slot added here is intended for the exec sched job. */ + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; - struct dma_fence *rebind_fence; struct xe_vm *vm; bool write_locked, skip_retry = false; ktime_t end = 0; @@ -290,39 +257,7 @@ retry: goto err_exec; } - /* - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute - * VM mode only. - */ - rebind_fence = xe_vm_rebind(vm, false); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); - goto err_put_job; - } - - /* - * We store the rebind_fence in the VM so subsequent execs don't get - * scheduled before the rebinds of userptrs / evicted BOs is complete. - */ - if (rebind_fence) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = rebind_fence; - } - if (vm->rebind_fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &vm->rebind_fence->flags)) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = NULL; - } else { - dma_fence_get(vm->rebind_fence); - err = drm_sched_job_add_dependency(&job->drm, - vm->rebind_fence); - if (err) - goto err_put_job; - } - } - - /* Wait behind munmap style rebinds */ + /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, xe_vm_resv(vm), diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 11e150f4c0c1..ead25d5e723e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -448,7 +448,7 @@ find_hw_engine(struct xe_device *xe, { u32 idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 62b3d9d1d7cd..462b33195032 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -148,6 +148,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + /** + * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed + * Protected by @vm's resv. Unused if @vm == NULL. + */ + u64 tlb_flush_seqno; /** @lrc: logical ring context for this exec queue */ struct xe_lrc lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 241c294270d9..fa9e9853c53b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); - unsigned int num_shared = 2; /* slots for bind + move */ int err; - err = xe_vm_prepare_vma(exec, vma, num_shared); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f03e077f81a0..e598a4363d01 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, xe_gt_tlb_fence_timeout); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 70c615dd1498..07b2f724ec45 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -177,13 +177,6 @@ struct xe_gt { * xe_gt_tlb_fence_timeout after the timeut interval is over. */ struct delayed_work fence_tdr; - /** @tlb_invalidation.fence_context: context for TLB invalidation fences */ - u64 fence_context; - /** - * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by - * tlb_invalidation.lock - */ - u32 fence_seqno; /** @tlb_invalidation.lock: protects TLB invalidation fences */ spinlock_t lock; } tlb_invalidation; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ff77bc8da1b2..e2a4c3b5e9ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1220,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) init_waitqueue_head(&ge->suspend_wait); timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - q->sched_props.job_timeout_ms; + msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, get_submit_wq(guc), q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index b82233a41606..9ac7fbe201b3 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a * As y can be < 2, we compute tau4 = (4 | x) << y * and then add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7ad853b0788a..57066faf575e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -97,7 +97,6 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END 0 { const u32 base = hwe->mmio_base; @@ -168,7 +167,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 dg2_xcs_offsets[] = { @@ -202,7 +201,7 @@ static const u8 dg2_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 gen12_rcs_offsets[] = { @@ -298,7 +297,7 @@ static const u8 gen12_rcs_offsets[] = { REG(0x084), NOP(1), - END + 0 }; static const u8 xehp_rcs_offsets[] = { @@ -339,7 +338,7 @@ static const u8 xehp_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 dg2_rcs_offsets[] = { @@ -382,7 +381,7 @@ static const u8 dg2_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 mtl_rcs_offsets[] = { @@ -425,7 +424,7 @@ static const u8 mtl_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; #define XE2_CTX_COMMON \ @@ -471,7 +470,7 @@ static const u8 xe2_rcs_offsets[] = { LRI(1, 0), /* [0x47] */ REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - END + 0 }; static const u8 xe2_bcs_offsets[] = { @@ -482,16 +481,15 @@ static const u8 xe2_bcs_offsets[] = { REG16(0x200), /* [0x42] BCS_SWCTRL */ REG16(0x204), /* [0x44] BLIT_CCTL */ - END + 0 }; static const u8 xe2_xcs_offsets[] = { XE2_CTX_COMMON, - END + 0 }; -#undef END #undef REG16 #undef REG #undef LRI @@ -527,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) { - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); /* TODO: Timestamp */ } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ee1bb938c493..2ba4fb9511f6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (vm->flags & XE_VM_FLAG_64K && level == 1) flags = XE_PDE_64K; - entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); @@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + @@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, #define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) drm_suballoc_manager_init(&m->vm_update_sa, - (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * + (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * NUM_VMUSA_UNIT_PER_PAGE, 0); m->pt_bo = bo; @@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m, struct xe_vm *vm = m->q->vm; u16 pat_index; u32 ptes; - u64 ofs = at_pt * XE_PAGE_SIZE; + u64 ofs = (u64)at_pt * XE_PAGE_SIZE; u64 cur_ofs; /* Indirect access needs compression enabled uncached PAT index */ diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7bce2a332603..7d50c6e89d8e 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) struct xe_exec_queue *q = pfence->q; pfence->error = q->ops->suspend(q); - queue_work(system_unbound_wq, &pfence->preempt_work); + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); return true; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7f54bc3e389d..4efc8c1a3d7a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); + dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&ifence->base.link); @@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); if (err) goto err; + + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); @@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * non-faulting LR, in particular on user-space batch buffer chaining, * it needs to be done here. */ - if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || - (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { + if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); + } else if (rebind && !xe_vm_in_lr_mode(vm)) { + /* We bump also if batch_invalidate_tlb is true */ + vm->tlb_flush_seqno++; } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue } /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || last_munmap_rebind ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); @@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu struct dma_fence *fence = NULL; struct invalidation_fence *ifence; struct xe_range_fence *rfence; + int err; LLIST_HEAD(deferred); @@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, num_entries); + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + return ERR_PTR(err); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 92bb06c0586e..075f9eaef031 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -132,7 +132,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id > XE_MAX_GT_PER_TILE) + if (eci->gt_id >= XE_MAX_GT_PER_TILE) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c4edffcd4a32..5b2b37b59813 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_vm *vm = job->q->vm; struct xe_gt *gt = job->q->gt; - if (vm && vm->batch_invalidate_tlb) { + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); @@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; - struct xe_vm *vm = job->q->vm; dw[i++] = preparser_disable(true); @@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } - if (vm && vm->batch_invalidate_tlb) + if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); dw[i++] = preparser_disable(false); - if (!vm || !vm->batch_invalidate_tlb) + if (!job->ring_ops_flush_tlb) i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - struct xe_vm *vm = job->q->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); @@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); + i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 8151ddafb940..b0c7fa4693cf 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job) void xe_sched_job_arm(struct xe_sched_job *job) { + struct xe_exec_queue *q = job->q; + struct xe_vm *vm = q->vm; + + if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) && + (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) { + xe_vm_assert_held(vm); + q->tlb_flush_seqno = vm->tlb_flush_seqno; + job->ring_ops_flush_tlb = true; + } + drm_sched_job_arm(&job->drm); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index b1d83da50a53..5e12724219fd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -39,6 +39,8 @@ struct xe_sched_job { } user_fence; /** @migrate_flush_flags: Additional flush flags for migration jobs */ u32 migrate_flush_flags; + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ + bool ring_ops_flush_tlb; /** @batch_addr: batch buffer address of job */ u64 batch_addr[]; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f88faef4142b..3d4c8f342e21 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) return 0; } +/** + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas + * @vm: The vm for which we are rebinding. + * @exec: The struct drm_exec with the locked GEM objects. + * @num_fences: The number of fences to reserve for the operation, not + * including rebinds and validations. + * + * Validates all evicted gem objects and rebinds their vmas. Note that + * rebindings may cause evictions and hence the validation-rebind + * sequence is rerun until there are no more objects to validate. + * + * Return: 0 on success, negative error code on error. In particular, + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if + * the drm_exec transaction needs to be restarted. + */ +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gem_object *obj; + unsigned long index; + int ret; + + do { + ret = drm_gpuvm_validate(&vm->gpuvm, exec); + if (ret) + return ret; + + ret = xe_vm_rebind(vm, false); + if (ret) + return ret; + } while (!list_empty(&vm->gpuvm.evict.list)); + + drm_exec_for_each_locked_object(exec, index, obj) { + ret = dma_resv_reserve_fences(obj->resv, num_fences); + if (ret) + return ret; + } + + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { int err; - /* - * 1 fence for each preempt fence plus a fence for each tile from a - * possible rebind - */ - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + - vm->xe->info.tile_count); + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); if (err) return err; @@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); if (err) return err; @@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - return drm_gpuvm_validate(&vm->gpuvm, exec); + /* + * Add validation and rebinding to the locking loop since both can + * cause evictions which may require blocing dma_resv locks. + * The fence reservation here is intended for the new preempt fences + * we attach at the end of the rebind work. + */ + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); struct drm_exec exec; - struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); ktime_t end = 0; @@ -568,18 +609,11 @@ retry: if (err) goto out_unlock; - rebind_fence = xe_vm_rebind(vm, true); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); + err = xe_vm_rebind(vm, true); + if (err) goto out_unlock; - } - if (rebind_fence) { - dma_fence_wait(rebind_fence, false); - dma_fence_put(rebind_fence); - } - - /* Wait on munmap style VM unbinds */ + /* Wait on rebinds and munmap style VM unbinds */ wait = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); @@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { - struct dma_fence *fence = NULL; + struct dma_fence *fence; struct xe_vma *vma, *next; lockdep_assert_held(&vm->lock); if (xe_vm_in_lr_mode(vm) && !rebind_worker) - return NULL; + return 0; xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, @@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_assert(vm->xe, vma->tile_present); list_del_init(&vma->combined_links.rebind); - dma_fence_put(fence); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); if (IS_ERR(fence)) - return fence; + return PTR_ERR(fence); + dma_fence_put(fence); } - return fence; + return 0; } static void xe_vma_free(struct xe_vma *vma) @@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } /** - * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * xe_vm_lock_vma() - drm_exec utility to lock a vma * @exec: The drm_exec object we're currently locking for. * @vma: The vma for witch we want to lock the vm resv and any attached * object's resv. - * @num_shared: The number of dma-fence slots to pre-allocate in the - * objects' reservation objects. * * Return: 0 on success, negative error code on error. In particular * may return -EDEADLK on WW transaction contention and -EINTR if * an interruptible wait is terminated by a signal. */ -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared) +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); struct xe_bo *bo = xe_vma_bo(vma); int err; XE_WARN_ON(!vm); - if (num_shared) - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - else - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) { - if (num_shared) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); - else - err = drm_exec_lock_obj(exec, &bo->ttm.base); - } + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); return err; } @@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_vm_prepare_vma(&exec, vma, 0); + err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; @@ -1552,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe->usm.num_vm_in_fault_mode--; else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe->usm.num_vm_in_non_fault_mode--; + + if (vm->usm.asid) { + void *lookup; + + xe_assert(xe, xe->info.has_asid); + xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); + + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + xe_assert(xe, lookup == vm); + } mutex_unlock(&xe->usm.lock); for_each_tile(tile, xe, id) @@ -1567,29 +1602,19 @@ static void vm_destroy_work_func(struct work_struct *w) struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; - void *lookup; /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); mutex_destroy(&vm->snap_mutex); - if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_put(xe); - if (xe->info.has_asid && vm->usm.asid) { - mutex_lock(&xe->usm.lock); - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - mutex_unlock(&xe->usm.lock); - } - } - for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); - dma_fence_put(vm->rebind_fence); kfree(vm); } @@ -2512,7 +2537,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - err = xe_vm_prepare_vma(exec, vma, 1); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6df1f1c7f85d..306cd0934a19 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); @@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared); +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); + +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences); /** * xe_vm_resv() - Return's the vm's reservation object diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index ae5fb565f6bf..badf3945083d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,9 +177,6 @@ struct xe_vm { */ struct list_head rebind_list; - /** @rebind_fence: rebind fence from execbuf */ - struct dma_fence *rebind_fence; - /** * @destroy_work: worker to destroy VM, needed as a dma_fence signaling * from an irq context can be last put and the destroy needs to be able @@ -264,6 +261,11 @@ struct xe_vm { bool capture_once; } error_capture; + /** + * @tlb_flush_seqno: Required TLB flush seqno for the next exec. + * protected by the vm resv. + */ + u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 783975d1384f..7c52757a89db 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev, return 0; } -static int host1x_dma_configure(struct device *dev) -{ - return of_dma_configure(dev, dev->of_node, true); -} - static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, .uevent = host1x_device_uevent, - .dma_configure = host1x_dma_configure, .pm = &host1x_device_pm_ops, }; @@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x, device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; - of_dma_configure(&device->dev, host1x->dev->of_node, true); - device->dev.dma_parms = &device->dma_parms; dma_set_max_seg_size(&device->dev, UINT_MAX); diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index adbf674355b2..fb8cd8469328 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel) hv_ringbuffer_cleanup(&channel->inbound); if (channel->ringbuffer_page) { - __free_pages(channel->ringbuffer_page, + /* In a CoCo VM leak the memory if it didn't get re-encrypted */ + if (!channel->ringbuffer_gpadlhandle.decrypted) + __free_pages(channel->ringbuffer_page, get_order(channel->ringbuffer_pagecount << PAGE_SHIFT)); channel->ringbuffer_page = NULL; @@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel, (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo); - if (ret) + if (ret) { + gpadl->decrypted = false; return ret; + } + /* + * Set the "decrypted" flag to true for the set_memory_decrypted() + * success case. In the failure case, the encryption state of the + * memory is unknown. Leave "decrypted" as true to ensure the + * memory will be leaked instead of going back on the free list. + */ + gpadl->decrypted = true; ret = set_memory_decrypted((unsigned long)kbuffer, PFN_UP(size)); if (ret) { @@ -527,9 +538,15 @@ cleanup: kfree(msginfo); - if (ret) - set_memory_encrypted((unsigned long)kbuffer, - PFN_UP(size)); + if (ret) { + /* + * If set_memory_encrypted() fails, the decrypted flag is + * left as true so the memory is leaked instead of being + * put back on the free list. + */ + if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size))) + gpadl->decrypted = false; + } return ret; } @@ -850,6 +867,8 @@ post_msg_err: if (ret) pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret); + gpadl->decrypted = ret; + return ret; } EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 3cabeeabb1ca..f001ae880e1d 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -237,8 +237,17 @@ int vmbus_connect(void) vmbus_connection.monitor_pages[0], 1); ret |= set_memory_decrypted((unsigned long) vmbus_connection.monitor_pages[1], 1); - if (ret) + if (ret) { + /* + * If set_memory_decrypted() fails, the encryption state + * of the memory is unknown. So leak the memory instead + * of risking returning decrypted memory to the free list. + * For simplicity, always handle both pages the same. + */ + vmbus_connection.monitor_pages[0] = NULL; + vmbus_connection.monitor_pages[1] = NULL; goto cleanup; + } /* * Set_memory_decrypted() will change the memory contents if @@ -337,13 +346,19 @@ void vmbus_disconnect(void) vmbus_connection.int_page = NULL; } - set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1); - set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1); + if (vmbus_connection.monitor_pages[0]) { + if (!set_memory_encrypted( + (unsigned long)vmbus_connection.monitor_pages[0], 1)) + hv_free_hyperv_page(vmbus_connection.monitor_pages[0]); + vmbus_connection.monitor_pages[0] = NULL; + } - hv_free_hyperv_page(vmbus_connection.monitor_pages[0]); - hv_free_hyperv_page(vmbus_connection.monitor_pages[1]); - vmbus_connection.monitor_pages[0] = NULL; - vmbus_connection.monitor_pages[1] = NULL; + if (vmbus_connection.monitor_pages[1]) { + if (!set_memory_encrypted( + (unsigned long)vmbus_connection.monitor_pages[1], 1)) + hv_free_hyperv_page(vmbus_connection.monitor_pages[1]); + vmbus_connection.monitor_pages[1] = NULL; + } } /* diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4cb17603a828..12a707ab73f8 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); + return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid); } static DEVICE_ATTR_RO(id); @@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", hv_dev->channel->state); + return sysfs_emit(buf, "%d\n", hv_dev->channel->state); } static DEVICE_ATTR_RO(state); @@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); + return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid); } static DEVICE_ATTR_RO(monitor_id); @@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "{%pUl}\n", - &hv_dev->channel->offermsg.offer.if_type); + return sysfs_emit(buf, "{%pUl}\n", + &hv_dev->channel->offermsg.offer.if_type); } static DEVICE_ATTR_RO(class_id); @@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "{%pUl}\n", - &hv_dev->channel->offermsg.offer.if_instance); + return sysfs_emit(buf, "{%pUl}\n", + &hv_dev->channel->offermsg.offer.if_instance); } static DEVICE_ATTR_RO(device_id); @@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); - return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); + return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); } static DEVICE_ATTR_RO(modalias); @@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); + return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); } static DEVICE_ATTR_RO(numa_node); #endif @@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_pending(hv_dev->channel, - vmbus_connection.monitor_pages[0])); + return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, + vmbus_connection.monitor_pages[0])); } static DEVICE_ATTR_RO(server_monitor_pending); @@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_pending(hv_dev->channel, - vmbus_connection.monitor_pages[1])); + return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, + vmbus_connection.monitor_pages[1])); } static DEVICE_ATTR_RO(client_monitor_pending); @@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_latency(hv_dev->channel, - vmbus_connection.monitor_pages[0])); + return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, + vmbus_connection.monitor_pages[0])); } static DEVICE_ATTR_RO(server_monitor_latency); @@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_latency(hv_dev->channel, - vmbus_connection.monitor_pages[1])); + return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, + vmbus_connection.monitor_pages[1])); } static DEVICE_ATTR_RO(client_monitor_latency); @@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_conn_id(hv_dev->channel, - vmbus_connection.monitor_pages[0])); + return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, + vmbus_connection.monitor_pages[0])); } static DEVICE_ATTR_RO(server_monitor_conn_id); @@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; - return sprintf(buf, "%d\n", - channel_conn_id(hv_dev->channel, - vmbus_connection.monitor_pages[1])); + return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, + vmbus_connection.monitor_pages[1])); } static DEVICE_ATTR_RO(client_monitor_conn_id); @@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", outbound.current_interrupt_mask); + return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sprintf(buf, "%d\n", outbound.current_read_index); + return sysfs_emit(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sprintf(buf, "%d\n", outbound.current_write_index); + return sysfs_emit(buf, "%d\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sprintf(buf, "%d\n", outbound.bytes_avail_toread); + return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread); } static DEVICE_ATTR_RO(out_read_bytes_avail); @@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); + return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(out_write_bytes_avail); @@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", inbound.current_interrupt_mask); + return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask); } static DEVICE_ATTR_RO(in_intr_mask); @@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", inbound.current_read_index); + return sysfs_emit(buf, "%d\n", inbound.current_read_index); } static DEVICE_ATTR_RO(in_read_index); @@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", inbound.current_write_index); + return sysfs_emit(buf, "%d\n", inbound.current_write_index); } static DEVICE_ATTR_RO(in_write_index); @@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", inbound.bytes_avail_toread); + return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread); } static DEVICE_ATTR_RO(in_read_bytes_avail); @@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, if (ret < 0) return ret; - return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); + return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(in_write_bytes_avail); @@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct vmbus_channel *channel = hv_dev->channel, *cur_sc; - int buf_size = PAGE_SIZE, n_written, tot_written; + int n_written; struct list_head *cur; if (!channel) @@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev, mutex_lock(&vmbus_connection.channel_mutex); - tot_written = snprintf(buf, buf_size, "%u:%u\n", - channel->offermsg.child_relid, channel->target_cpu); + n_written = sysfs_emit(buf, "%u:%u\n", + channel->offermsg.child_relid, + channel->target_cpu); list_for_each(cur, &channel->sc_list) { - if (tot_written >= buf_size - 1) - break; cur_sc = list_entry(cur, struct vmbus_channel, sc_list); - n_written = scnprintf(buf + tot_written, - buf_size - tot_written, - "%u:%u\n", - cur_sc->offermsg.child_relid, - cur_sc->target_cpu); - tot_written += n_written; + n_written += sysfs_emit_at(buf, n_written, "%u:%u\n", + cur_sc->offermsg.child_relid, + cur_sc->target_cpu); } mutex_unlock(&vmbus_connection.channel_mutex); - return tot_written; + return n_written; } static DEVICE_ATTR_RO(channel_vp_mapping); @@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); - return sprintf(buf, "0x%x\n", hv_dev->vendor_id); + return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id); } static DEVICE_ATTR_RO(vendor); @@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); - return sprintf(buf, "0x%x\n", hv_dev->device_id); + return sysfs_emit(buf, "0x%x\n", hv_dev->device_id); } static DEVICE_ATTR_RO(device); @@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev, ssize_t len; device_lock(dev); - len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); + len = sysfs_emit(buf, "%s\n", hv_dev->driver_override); device_unlock(dev); return len; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a6861660cb8c..79870dd7a014 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -536,11 +536,12 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, if (read_write == I2C_SMBUS_READ || command == I2C_SMBUS_BLOCK_PROC_CALL) { - status = i801_get_block_len(priv); - if (status < 0) + len = i801_get_block_len(priv); + if (len < 0) { + status = len; goto out; + } - len = status; data->block[0] = len; inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 76f79b68cef8..888ca636f3f3 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val) decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val); } +#ifdef CONFIG_I2C_PXA_SLAVE static const struct bits icr_bits[] = { PXA_BIT(ICR_START, "START", NULL), PXA_BIT(ICR_STOP, "STOP", NULL), @@ -342,7 +343,6 @@ static const struct bits icr_bits[] = { PXA_BIT(ICR_UR, "UR", "ur"), }; -#ifdef CONFIG_I2C_PXA_SLAVE static void decode_ICR(unsigned int val) { decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index bf0df6ee4f78..07fb8d3c037f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1026,23 +1026,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } -static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id) +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, + enum ib_cm_state old_state) { struct cm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__, - cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount)); + pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; + enum ib_cm_state old_state; struct cm_work *work; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); + old_state = cm_id->state; retest: switch (cm_id->state) { case IB_CM_LISTEN: @@ -1151,7 +1154,7 @@ retest: msecs_to_jiffies( CM_DESTROY_ID_WAIT_TIMEOUT)); if (!ret) /* timeout happened */ - cm_destroy_id_wait_timeout(cm_id); + cm_destroy_id_wait_timeout(cm_id, old_state); } while (!ret); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 0c3c4e64812c..3e43687a7f6f 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -188,7 +188,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev = dev->mdev; mdev_port_num = 1; } - if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { + if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 && + !mlx5_core_mp_enabled(mdev)) { /* set local port to one for Function-Per-Port HCA. */ mdev = dev->mdev; mdev_port_num = 1; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index ae466e72fc43..255677bc12b2 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -33,6 +33,8 @@ void rxe_dealloc(struct ib_device *ib_dev) if (rxe->tfm) crypto_free_shash(rxe->tfm); + + mutex_destroy(&rxe->usdev_lock); } /* initialize rxe device parameters */ diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 5d1010cafed8..7e9b996b47c8 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -176,6 +176,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst, path->num_nodes = num_nodes; + mutex_lock(&icc_bw_lock); + for (i = num_nodes - 1; i >= 0; i--) { node->provider->users++; hlist_add_head(&path->reqs[i].req_node, &node->req_list); @@ -186,6 +188,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst, node = node->reverse; } + mutex_unlock(&icc_bw_lock); + return path; } @@ -792,12 +796,16 @@ void icc_put(struct icc_path *path) pr_err("%s: error (%d)\n", __func__, ret); mutex_lock(&icc_lock); + mutex_lock(&icc_bw_lock); + for (i = 0; i < path->num_nodes; i++) { node = path->reqs[i].node; hlist_del(&path->reqs[i].req_node); if (!WARN_ON(!node->provider->users)) node->provider->users--; } + + mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); kfree_const(path->name); diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c index 99824675ee3f..654abb9ce08e 100644 --- a/drivers/interconnect/qcom/x1e80100.c +++ b/drivers/interconnect/qcom/x1e80100.c @@ -116,15 +116,6 @@ static struct qcom_icc_node xm_sdc2 = { .links = { X1E80100_SLAVE_A2NOC_SNOC }, }; -static struct qcom_icc_node ddr_perf_mode_master = { - .name = "ddr_perf_mode_master", - .id = X1E80100_MASTER_DDR_PERF_MODE, - .channels = 1, - .buswidth = 4, - .num_links = 1, - .links = { X1E80100_SLAVE_DDR_PERF_MODE }, -}; - static struct qcom_icc_node qup0_core_master = { .name = "qup0_core_master", .id = X1E80100_MASTER_QUP_CORE_0, @@ -688,14 +679,6 @@ static struct qcom_icc_node qns_a2noc_snoc = { .links = { X1E80100_MASTER_A2NOC_SNOC }, }; -static struct qcom_icc_node ddr_perf_mode_slave = { - .name = "ddr_perf_mode_slave", - .id = X1E80100_SLAVE_DDR_PERF_MODE, - .channels = 1, - .buswidth = 4, - .num_links = 0, -}; - static struct qcom_icc_node qup0_core_slave = { .name = "qup0_core_slave", .id = X1E80100_SLAVE_QUP_CORE_0, @@ -1377,12 +1360,6 @@ static struct qcom_icc_bcm bcm_acv = { .nodes = { &ebi }, }; -static struct qcom_icc_bcm bcm_acv_perf = { - .name = "ACV_PERF", - .num_nodes = 1, - .nodes = { &ddr_perf_mode_slave }, -}; - static struct qcom_icc_bcm bcm_ce0 = { .name = "CE0", .num_nodes = 1, @@ -1583,18 +1560,15 @@ static const struct qcom_icc_desc x1e80100_aggre2_noc = { }; static struct qcom_icc_bcm * const clk_virt_bcms[] = { - &bcm_acv_perf, &bcm_qup0, &bcm_qup1, &bcm_qup2, }; static struct qcom_icc_node * const clk_virt_nodes[] = { - [MASTER_DDR_PERF_MODE] = &ddr_perf_mode_master, [MASTER_QUP_CORE_0] = &qup0_core_master, [MASTER_QUP_CORE_1] = &qup1_core_master, [MASTER_QUP_CORE_2] = &qup2_core_master, - [SLAVE_DDR_PERF_MODE] = &ddr_perf_mode_slave, [SLAVE_QUP_CORE_0] = &qup0_core_slave, [SLAVE_QUP_CORE_1] = &qup1_core_slave, [SLAVE_QUP_CORE_2] = &qup2_core_slave, diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index e7a44929f0da..ac6754a85f35 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3228,30 +3228,33 @@ out: static void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; /* * The SNP support requires that IOMMU must be enabled, and is - * not configured in the passthrough mode. + * configured with V1 page table (DTE[Mode] = 0 is not supported). */ if (no_iommu || iommu_default_passthrough()) { - pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); - return; + pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); + goto disable_snp; } amd_iommu_snp_en = check_feature(FEATURE_SNP); if (!amd_iommu_snp_en) { - pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); - return; + pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); + goto disable_snp; } pr_info("IOMMU SNP support enabled.\n"); + return; - /* Enforce IOMMU v1 pagetable when SNP is enabled. */ - if (amd_iommu_pgtable != AMD_IOMMU_V1) { - pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; - } +disable_snp: + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); #endif } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d35c1b8c8e65..e692217fcb28 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, static u16 domain_id_alloc(void) { + unsigned long flags; int id; - spin_lock(&pd_bitmap_lock); + spin_lock_irqsave(&pd_bitmap_lock, flags); id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); BUG_ON(id == 0); if (id > 0 && id < MAX_DOMAIN_ID) __set_bit(id, amd_iommu_pd_alloc_bitmap); else id = 0; - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); return id; } static void domain_id_free(int id) { - spin_lock(&pd_bitmap_lock); + unsigned long flags; + + spin_lock_irqsave(&pd_bitmap_lock, flags); if (id > 0 && id < MAX_DOMAIN_ID) __clear_bit(id, amd_iommu_pd_alloc_bitmap); - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); } static void free_gcr3_tbl_level1(u64 *tbl) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5ed036225e69..41f93c3ab160 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1139,7 +1139,8 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0 */ - unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V); + unused_update.data[0] = entry->data[0] & + cpu_to_le64(~STRTAB_STE_0_V); entry_set(smmu, sid, entry, &unused_update, 0, 1); entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); entry_set(smmu, sid, entry, target, 0, 1); @@ -1453,14 +1454,17 @@ static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); } -static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target) +static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, + struct arm_smmu_ste *target) { memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( STRTAB_STE_0_V | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS)); - target->data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + + if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) + target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); } static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, @@ -1523,6 +1527,7 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; u64 vtcr_val; + struct arm_smmu_device *smmu = master->smmu; memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -1531,9 +1536,11 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) | - FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); + master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + + if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) + target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | @@ -1560,7 +1567,8 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, * This can safely directly manipulate the STE memory without a sync sequence * because the STE table has not been installed in the SMMU yet. */ -static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, +static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu, + struct arm_smmu_ste *strtab, unsigned int nent) { unsigned int i; @@ -1569,7 +1577,7 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, if (disable_bypass) arm_smmu_make_abort_ste(strtab); else - arm_smmu_make_bypass_ste(strtab); + arm_smmu_make_bypass_ste(smmu, strtab); strtab++; } } @@ -1597,7 +1605,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); + arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT); arm_smmu_write_strtab_l1_desc(strtab, desc); return 0; } @@ -2637,8 +2645,9 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_ste ste; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); - arm_smmu_make_bypass_ste(&ste); + arm_smmu_make_bypass_ste(master->smmu, &ste); return arm_smmu_attach_dev_ste(dev, &ste); } @@ -3264,7 +3273,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); cfg->strtab_base_cfg = reg; - arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents); + arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents); return 0; } @@ -3777,6 +3786,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; } + if (reg & IDR1_ATTR_TYPES_OVR) + smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR; + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); @@ -3992,7 +4004,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) * STE table is not programmed to HW, see * arm_smmu_initial_bypass_stes() */ - arm_smmu_make_bypass_ste( + arm_smmu_make_bypass_ste(smmu, arm_smmu_get_step_for_sid(smmu, rmr->sids[i])); } } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 23baf117e7e4..2a19bb63e5c6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -44,6 +44,7 @@ #define IDR1_TABLES_PRESET (1 << 30) #define IDR1_QUEUES_PRESET (1 << 29) #define IDR1_REL (1 << 28) +#define IDR1_ATTR_TYPES_OVR (1 << 27) #define IDR1_CMDQS GENMASK(25, 21) #define IDR1_EVTQS GENMASK(20, 16) #define IDR1_PRIQS GENMASK(15, 11) @@ -647,6 +648,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) +#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 50eb9aed47cc..a7ecd90303dc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } dev_iommu_priv_set(dev, info); - ret = device_rbtree_insert(iommu, info); - if (ret) - goto free; + if (pdev && pci_ats_supported(pdev)) { + ret = device_rbtree_insert(iommu, info); + if (ret) + goto free; + } if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ret = intel_pasid_alloc_table(dev); @@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev) struct intel_iommu *iommu = info->iommu; mutex_lock(&iommu->iopf_lock); - device_rbtree_remove(info); + if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) + device_rbtree_remove(info); mutex_unlock(&iommu->iopf_lock); if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index cf43e798eca4..44083d01852d 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, iommu_pmu_set_filter(domain, event->attr.config1, IOMMU_PMU_FILTER_DOMAIN, idx, event->attr.config1); - iommu_pmu_set_filter(pasid, event->attr.config1, + iommu_pmu_set_filter(pasid, event->attr.config2, IOMMU_PMU_FILTER_PASID, idx, event->attr.config1); iommu_pmu_set_filter(ats, event->attr.config2, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index c1bed89b1026..ee3b469e2da1 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 098869007c69..a95a483def2d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3354,6 +3354,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, { /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; + struct group_device *device; void *curr; int ret; @@ -3363,10 +3364,18 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, if (!group) return -ENODEV; - if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) + if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner || + pasid == IOMMU_NO_PASID) return -EINVAL; mutex_lock(&group->mutex); + for_each_group_device(group, device) { + if (pasid >= device->dev->iommu->max_pasids) { + ret = -EINVAL; + goto out_unlock; + } + } + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); if (curr) { ret = xa_err(curr) ? : -EBUSY; diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 99d4b075df49..76656fe0470d 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -37,6 +37,7 @@ config IOMMUFD_TEST depends on DEBUG_KERNEL depends on FAULT_INJECTION depends on RUNTIME_TESTING_MENU + select IOMMUFD_DRIVER default n help This is dangerous, do not enable unless running diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b8c47f18bc26..6a2707fe7a78 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data}, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids); static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a9fa2a54dc9b..d6e4002200bd 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = { { .compatible = "mediatek,mt2701-m4u", }, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids); static const struct component_master_ops mtk_iommu_v1_com_ops = { .bind = mtk_iommu_v1_bind, diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index a55528469278..4b021a67bdfe 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -316,7 +316,7 @@ static int armada_370_xp_msi_init(struct device_node *node, return 0; } #else -static void armada_370_xp_msi_reenable_percpu(void) {} +static __maybe_unused void armada_370_xp_msi_reenable_percpu(void) {} static inline int armada_370_xp_msi_init(struct device_node *node, phys_addr_t main_int_phys_base) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fca888b36680..2a537cbfcb07 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe); unsigned long vpt_addr, vconf_addr; u64 target; bool alloc; @@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, if (is_v4_1(its)) { alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count); its_encode_alloc(cmd, alloc); + /* + * Unmapping a VPE is self-synchronizing on GICv4.1, + * no need to issue a VSYNC. + */ + vpe = NULL; } goto out; @@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, out: its_fixup_cmd(cmd); - return valid_vpe(its, desc->its_vmapp_cmd.vpe); + return vpe; } static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 2776ca5fc33f..b215b28cad7b 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } static int data_sock_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int len) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err = 0, opt = 0; if (*debug & DEBUG_SOCKET) printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock, - level, optname, len); + level, optname, optlen); lock_sock(sk); switch (optname) { case MISDN_TIME_STAMP: - if (copy_from_sockptr(&opt, optval, sizeof(int))) { - err = -EFAULT; + err = copy_safe_from_sockptr(&opt, sizeof(opt), + optval, optlen); + if (err) break; - } if (opt) _pms(sk)->cmask |= MISDN_TIME_STAMP; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 37b9f8f1ae1a..7f3dc8ee6ab8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4221,7 +4221,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 00c9b9c05001..01d2743444ec 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -8,33 +8,14 @@ #include "murmurhash3.h" +#include + static inline u64 rotl64(u64 x, s8 r) { return (x << r) | (x >> (64 - r)); } #define ROTL64(x, y) rotl64(x, y) -static __always_inline u64 getblock64(const u64 *p, int i) -{ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return p[i]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return __builtin_bswap64(p[i]); -#else -#error "can't figure out byte order" -#endif -} - -static __always_inline void putblock64(u64 *p, int i, u64 value) -{ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - p[i] = value; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - p[i] = __builtin_bswap64(value); -#else -#error "can't figure out byte order" -#endif -} /* Finalization mix - force all bits of a hash block to avalanche */ @@ -60,6 +41,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) const u64 c1 = 0x87c37b91114253d5LLU; const u64 c2 = 0x4cf5ad432745937fLLU; + u64 *hash_out = out; + /* body */ const u64 *blocks = (const u64 *)(data); @@ -67,8 +50,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) int i; for (i = 0; i < nblocks; i++) { - u64 k1 = getblock64(blocks, i * 2 + 0); - u64 k2 = getblock64(blocks, i * 2 + 1); + u64 k1 = get_unaligned_le64(&blocks[i * 2]); + u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]); k1 *= c1; k1 = ROTL64(k1, 31); @@ -170,6 +153,6 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) h1 += h2; h2 += h1; - putblock64((u64 *)out, 0, h1); - putblock64((u64 *)out, 1, h2); + put_unaligned_le64(h1, &hash_out[0]); + put_unaligned_le64(h2, &hash_out[1]); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index be8ac24f50b6..7b8a71ca66dd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, for (j = 0; j < i; j++) if (r1_bio->bios[j]) rdev_dec_pending(conf->mirrors[j].rdev, mddev); - free_r1bio(r1_bio); + mempool_free(r1_bio, &conf->r1bio_pool); allow_barrier(conf, bio->bi_iter.bi_sector); if (bio->bi_opf & REQ_NOWAIT) { diff --git a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c index 4c34344dc7dc..d7027d600208 100644 --- a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c @@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv) dev_err(&dev->plat_dev->dev, "Watchdog timeout!!"); - mutex_lock(&dev->dev_mutex); + mutex_lock(&dev->dev_ctx_lock); list_for_each_entry(ctx, &dev->ctx_list, list) { ctx->state = MTK_STATE_ABORT; mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id); } - mutex_unlock(&dev->dev_mutex); + mutex_unlock(&dev->dev_ctx_lock); } static void mtk_vcodec_vpu_reset_enc_handler(void *priv) @@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv) dev_err(&dev->plat_dev->dev, "Watchdog timeout!!"); - mutex_lock(&dev->dev_mutex); + mutex_lock(&dev->dev_ctx_lock); list_for_each_entry(ctx, &dev->ctx_list, list) { ctx->state = MTK_STATE_ABORT; mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id); } - mutex_unlock(&dev->dev_mutex); + mutex_unlock(&dev->dev_ctx_lock); } static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = { diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c index f47c98faf068..2073781ccadb 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c @@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file) ctx->dev->vdec_pdata->init_vdec_params(ctx); + mutex_lock(&dev->dev_ctx_lock); list_add(&ctx->list, &dev->ctx_list); + mutex_unlock(&dev->dev_ctx_lock); mtk_vcodec_dbgfs_create(ctx); mutex_unlock(&dev->dev_mutex); @@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file) v4l2_ctrl_handler_free(&ctx->ctrl_hdl); mtk_vcodec_dbgfs_remove(dev, ctx->id); + mutex_lock(&dev->dev_ctx_lock); list_del_init(&ctx->list); + mutex_unlock(&dev->dev_ctx_lock); kfree(ctx); mutex_unlock(&dev->dev_mutex); return 0; @@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev) for (i = 0; i < MTK_VDEC_HW_MAX; i++) mutex_init(&dev->dec_mutex[i]); mutex_init(&dev->dev_mutex); + mutex_init(&dev->dev_ctx_lock); spin_lock_init(&dev->irqlock); snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s", diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h index 849b89dd205c..85b2c0d3d8bc 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h @@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx { * * @dec_mutex: decoder hardware lock * @dev_mutex: video_device lock + * @dev_ctx_lock: the lock of context list * @decode_workqueue: decode work queue * * @irqlock: protect data access by irq handler and work thread @@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev { /* decoder hardware mutex lock */ struct mutex dec_mutex[MTK_VDEC_HW_MAX]; struct mutex dev_mutex; + struct mutex dev_ctx_lock; struct workqueue_struct *decode_workqueue; spinlock_t irqlock; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c index 06ed47df693b..21836dd6ef85 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c @@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx) inst->vpu.codec_type = ctx->current_codec; inst->vpu.capture_type = ctx->capture_fourcc; - ctx->drv_handle = inst; err = vpu_dec_init(&inst->vpu); if (err) { mtk_vdec_err(ctx, "vdec_hevc init err=%d", err); @@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx) mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x", inst, inst->vpu.codec_type); + ctx->drv_handle = inst; return 0; error_free_inst: kfree(inst); diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c index 19407f9bc773..987b3d71b662 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c @@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs, inst->frm_cnt, y_fb_dma, c_fb_dma, fb); inst->cur_fb = fb; - dec->bs_dma = (unsigned long)bs->dma_addr; + dec->bs_dma = (uint64_t)bs->dma_addr; dec->bs_sz = bs->size; dec->cur_y_fb_dma = y_fb_dma; dec->cur_c_fb_dma = c_fb_dma; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c index 55355fa70090..039082f600c8 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c @@ -16,6 +16,7 @@ #include "../vdec_drv_base.h" #include "../vdec_vpu_if.h" +#define VP9_MAX_SUPER_FRAMES_NUM 8 #define VP9_SUPER_FRAME_BS_SZ 64 #define MAX_VP9_DPB_SIZE 9 @@ -133,11 +134,11 @@ struct vp9_sf_ref_fb { */ struct vdec_vp9_vsi { unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ]; - struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1]; + struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM]; int sf_next_ref_fb_idx; unsigned int sf_frm_cnt; - unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1]; - unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1]; + unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM]; + unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM]; unsigned int sf_frm_idx; unsigned int sf_init; struct vdec_fb fb; @@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst) /* if this super frame and it is not last sub-frame, get next fb for * sub-frame decode */ - if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1) + if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt) vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); } @@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb) static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst, struct vdec_vp9_vsi *vsi) { - if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) { + if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) { mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx); return -EIO; } diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c index cf48d09b78d7..eea709d93820 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c @@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst unsigned int mi_row; unsigned int mi_col; unsigned int offset; - unsigned int pa; + dma_addr_t pa; unsigned int size; struct vdec_vp9_slice_tiles *tiles; unsigned char *pos; @@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst pos = va + offset; end = va + bs->size; /* truncated */ - pa = (unsigned int)bs->dma_addr + offset; + pa = bs->dma_addr + offset; tb = instance->tile.va; for (i = 0; i < rows; i++) { for (j = 0; j < cols; j++) { diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c index 82e57ae983d5..da6be556727b 100644 --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c @@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde struct mtk_vcodec_dec_ctx *ctx; int ret = false; + mutex_lock(&dec_dev->dev_ctx_lock); list_for_each_entry(ctx, &dec_dev->ctx_list, list) { if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) { ret = true; break; } } + mutex_unlock(&dec_dev->dev_ctx_lock); return ret; } diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c index 6319f24bc714..3cb8a1622222 100644 --- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c @@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file) mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ", ctx->id, ctx, ctx->m2m_ctx); + mutex_lock(&dev->dev_ctx_lock); list_add(&ctx->list, &dev->ctx_list); + mutex_unlock(&dev->dev_ctx_lock); mutex_unlock(&dev->dev_mutex); mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev), @@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file) v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); + mutex_lock(&dev->dev_ctx_lock); list_del_init(&ctx->list); + mutex_unlock(&dev->dev_ctx_lock); kfree(ctx); mutex_unlock(&dev->dev_mutex); return 0; @@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev) mutex_init(&dev->enc_mutex); mutex_init(&dev->dev_mutex); + mutex_init(&dev->dev_ctx_lock); spin_lock_init(&dev->irqlock); snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s", diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h index a042f607ed8d..0bd85d0fb379 100644 --- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h +++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h @@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx { * * @enc_mutex: encoder hardware lock. * @dev_mutex: video_device lock + * @dev_ctx_lock: the lock of context list * @encode_workqueue: encode work queue * * @enc_irq: h264 encoder irq resource @@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev { /* encoder hardware mutex lock */ struct mutex enc_mutex; struct mutex dev_mutex; + struct mutex dev_ctx_lock; struct workqueue_struct *encode_workqueue; int enc_irq; diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c index 84ad1cc6ad17..51bb7ee141b9 100644 --- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c +++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c @@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven struct mtk_vcodec_enc_ctx *ctx; int ret = false; + mutex_lock(&enc_dev->dev_ctx_lock); list_for_each_entry(ctx, &enc_dev->ctx_list, list) { if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) { ret = true; break; } } + mutex_unlock(&enc_dev->dev_ctx_lock); return ret; } diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 1a64364700eb..0ad2ff9065aa 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1002,7 +1002,7 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) } else { pcr->card_removed |= SD_EXIST; pcr->card_inserted &= ~SD_EXIST; - if (PCI_PID(pcr) == PID_5261) { + if ((PCI_PID(pcr) == PID_5261) || (PCI_PID(pcr) == PID_5264)) { rtsx_pci_write_register(pcr, RTS5261_FW_STATUS, RTS5261_EXPRESS_LINK_FAIL_MASK, 0); pcr->extra_caps |= EXTRA_CAPS_SD_EXPRESS; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index b5757993c9b2..c39718042e2e 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -116,7 +116,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c index 6c9f00bcb94b..b543e6b9f3cf 100644 --- a/drivers/misc/mei/platform-vsc.c +++ b/drivers/misc/mei/platform-vsc.c @@ -400,25 +400,40 @@ static void mei_vsc_remove(struct platform_device *pdev) static int mei_vsc_suspend(struct device *dev) { struct mei_device *mei_dev = dev_get_drvdata(dev); + struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev); mei_stop(mei_dev); + mei_disable_interrupts(mei_dev); + + vsc_tp_free_irq(hw->tp); + return 0; } static int mei_vsc_resume(struct device *dev) { struct mei_device *mei_dev = dev_get_drvdata(dev); + struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev); int ret; + ret = vsc_tp_request_irq(hw->tp); + if (ret) + return ret; + ret = mei_restart(mei_dev); if (ret) - return ret; + goto err_free; /* start timer if stopped in suspend */ schedule_delayed_work(&mei_dev->timer_work, HZ); return 0; + +err_free: + vsc_tp_free_irq(hw->tp); + + return ret; } static DEFINE_SIMPLE_DEV_PM_OPS(mei_vsc_pm_ops, mei_vsc_suspend, mei_vsc_resume); diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index ecfb70cd057c..e6a98dba8a73 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -94,6 +94,27 @@ static const struct acpi_gpio_mapping vsc_tp_acpi_gpios[] = { {} }; +static irqreturn_t vsc_tp_isr(int irq, void *data) +{ + struct vsc_tp *tp = data; + + atomic_inc(&tp->assert_cnt); + + wake_up(&tp->xfer_wait); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t vsc_tp_thread_isr(int irq, void *data) +{ + struct vsc_tp *tp = data; + + if (tp->event_notify) + tp->event_notify(tp->event_notify_context); + + return IRQ_HANDLED; +} + /* wakeup firmware and wait for response */ static int vsc_tp_wakeup_request(struct vsc_tp *tp) { @@ -383,6 +404,37 @@ int vsc_tp_register_event_cb(struct vsc_tp *tp, vsc_tp_event_cb_t event_cb, } EXPORT_SYMBOL_NS_GPL(vsc_tp_register_event_cb, VSC_TP); +/** + * vsc_tp_request_irq - request irq for vsc_tp device + * @tp: vsc_tp device handle + */ +int vsc_tp_request_irq(struct vsc_tp *tp) +{ + struct spi_device *spi = tp->spi; + struct device *dev = &spi->dev; + int ret; + + irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY); + ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + dev_name(dev), tp); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(vsc_tp_request_irq, VSC_TP); + +/** + * vsc_tp_free_irq - free irq for vsc_tp device + * @tp: vsc_tp device handle + */ +void vsc_tp_free_irq(struct vsc_tp *tp) +{ + free_irq(tp->spi->irq, tp); +} +EXPORT_SYMBOL_NS_GPL(vsc_tp_free_irq, VSC_TP); + /** * vsc_tp_intr_synchronize - synchronize vsc_tp interrupt * @tp: vsc_tp device handle @@ -413,27 +465,6 @@ void vsc_tp_intr_disable(struct vsc_tp *tp) } EXPORT_SYMBOL_NS_GPL(vsc_tp_intr_disable, VSC_TP); -static irqreturn_t vsc_tp_isr(int irq, void *data) -{ - struct vsc_tp *tp = data; - - atomic_inc(&tp->assert_cnt); - - return IRQ_WAKE_THREAD; -} - -static irqreturn_t vsc_tp_thread_isr(int irq, void *data) -{ - struct vsc_tp *tp = data; - - wake_up(&tp->xfer_wait); - - if (tp->event_notify) - tp->event_notify(tp->event_notify_context); - - return IRQ_HANDLED; -} - static int vsc_tp_match_any(struct acpi_device *adev, void *data) { struct acpi_device **__adev = data; @@ -490,10 +521,9 @@ static int vsc_tp_probe(struct spi_device *spi) tp->spi = spi; irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY); - ret = devm_request_threaded_irq(dev, spi->irq, vsc_tp_isr, - vsc_tp_thread_isr, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - dev_name(dev), tp); + ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + dev_name(dev), tp); if (ret) return ret; @@ -522,6 +552,8 @@ static int vsc_tp_probe(struct spi_device *spi) err_destroy_lock: mutex_destroy(&tp->mutex); + free_irq(spi->irq, tp); + return ret; } @@ -532,6 +564,8 @@ static void vsc_tp_remove(struct spi_device *spi) platform_device_unregister(tp->pdev); mutex_destroy(&tp->mutex); + + free_irq(spi->irq, tp); } static const struct acpi_device_id vsc_tp_acpi_ids[] = { diff --git a/drivers/misc/mei/vsc-tp.h b/drivers/misc/mei/vsc-tp.h index f9513ddc3e40..14ca195cbddc 100644 --- a/drivers/misc/mei/vsc-tp.h +++ b/drivers/misc/mei/vsc-tp.h @@ -37,6 +37,9 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen, int vsc_tp_register_event_cb(struct vsc_tp *tp, vsc_tp_event_cb_t event_cb, void *context); +int vsc_tp_request_irq(struct vsc_tp *tp); +void vsc_tp_free_irq(struct vsc_tp *tp); + void vsc_tp_intr_enable(struct vsc_tp *tp); void vsc_tp_intr_disable(struct vsc_tp *tp); void vsc_tp_intr_synchronize(struct vsc_tp *tp); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 64a3492e8002..90c51b12148e 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -413,7 +413,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; @@ -488,7 +488,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->flags & MMC_BLK_IOC_DROP) return 0; - if (idata->flags & MMC_BLK_IOC_SBC) + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) prev_idata = idatas[i - 1]; /* diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index 088f8ed4fdc4..a8ee0df47148 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on, host = slot->host; - if (slot->vsd) - gpiod_set_value(slot->vsd, power_on); - if (slot->vio) - gpiod_set_value(slot->vio, power_on); + if (power_on) { + if (slot->vsd) { + gpiod_set_value(slot->vsd, power_on); + msleep(1); + } + if (slot->vio) { + gpiod_set_value(slot->vio, power_on); + msleep(1); + } + } else { + if (slot->vio) { + gpiod_set_value(slot->vio, power_on); + msleep(50); + } + if (slot->vsd) { + gpiod_set_value(slot->vsd, power_on); + msleep(50); + } + } if (slot->pdata->set_power != NULL) slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on, @@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id) slot->pdata = &host->pdata->slots[id]; /* Check for some optional GPIO controls */ - slot->vsd = gpiod_get_index_optional(host->dev, "vsd", - id, GPIOD_OUT_LOW); + slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd", + id, GPIOD_OUT_LOW); if (IS_ERR(slot->vsd)) return dev_err_probe(host->dev, PTR_ERR(slot->vsd), "error looking up VSD GPIO\n"); - slot->vio = gpiod_get_index_optional(host->dev, "vio", - id, GPIOD_OUT_LOW); + slot->vio = devm_gpiod_get_index_optional(host->dev, "vio", + id, GPIOD_OUT_LOW); if (IS_ERR(slot->vio)) return dev_err_probe(host->dev, PTR_ERR(slot->vio), "error looking up VIO GPIO\n"); - slot->cover = gpiod_get_index_optional(host->dev, "cover", - id, GPIOD_IN); + slot->cover = devm_gpiod_get_index_optional(host->dev, "cover", + id, GPIOD_IN); if (IS_ERR(slot->cover)) return dev_err_probe(host->dev, PTR_ERR(slot->cover), "error looking up cover switch GPIO\n"); @@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev) if (IS_ERR(host->virt_base)) return PTR_ERR(host->virt_base); - host->slot_switch = gpiod_get_optional(host->dev, "switch", - GPIOD_OUT_LOW); - if (IS_ERR(host->slot_switch)) - return dev_err_probe(host->dev, PTR_ERR(host->slot_switch), - "error looking up slot switch GPIO\n"); - - INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work); INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work); @@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev) host->dev = &pdev->dev; platform_set_drvdata(pdev, host); + host->slot_switch = devm_gpiod_get_optional(host->dev, "switch", + GPIOD_OUT_LOW); + if (IS_ERR(host->slot_switch)) + return dev_err_probe(host->dev, PTR_ERR(host->slot_switch), + "error looking up slot switch GPIO\n"); + host->id = pdev->id; host->irq = irq; host->phys_base = res->start; diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index ab4b964d4058..1d8f5a76096a 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -999,6 +999,17 @@ free_pltfm: return err; } +static void dwcmshc_disable_card_clk(struct sdhci_host *host) +{ + u16 ctrl; + + ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (ctrl & SDHCI_CLOCK_CARD_EN) { + ctrl &= ~SDHCI_CLOCK_CARD_EN; + sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL); + } +} + static void dwcmshc_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); @@ -1006,8 +1017,14 @@ static void dwcmshc_remove(struct platform_device *pdev) struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); struct rk35xx_priv *rk_priv = priv->priv; + pm_runtime_get_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + sdhci_remove_host(host, 0); + dwcmshc_disable_card_clk(host); + clk_disable_unprepare(pltfm_host->clk); clk_disable_unprepare(priv->bus_clk); if (rk_priv) @@ -1099,17 +1116,6 @@ static void dwcmshc_enable_card_clk(struct sdhci_host *host) } } -static void dwcmshc_disable_card_clk(struct sdhci_host *host) -{ - u16 ctrl; - - ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (ctrl & SDHCI_CLOCK_CARD_EN) { - ctrl &= ~SDHCI_CLOCK_CARD_EN; - sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL); - } -} - static int dwcmshc_runtime_suspend(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index e78faef67d7a..94076b095571 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1439,6 +1439,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + if (host->tuning_mode != SDHCI_TUNING_MODE_3) + mmc_retune_needed(host->mmc); + if (omap_host->con != -EINVAL) sdhci_runtime_suspend_host(host); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 97a00ec9a4d4..caacdc0a3819 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->bdev_file) { invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1); - fput(dev->bdev_file); + bdev_fput(dev->bdev_file); } kfree(dev); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 767f66c37f6b..8090390edaf9 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface) mutex_unlock(&priv->reg_mutex); } -/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std - * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA - * must only be propagated to C-VLAN and MAC Bridge components. That means - * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, - * these frames are supposed to be processed by the CPU (software). So we make - * the switch only forward them to the CPU port. And if received from a CPU - * port, forward to a single port. The software is responsible of making the - * switch conform to the latter by setting a single port as destination port on - * the special tag. +/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) + * of the Open Systems Interconnection basic reference model (OSI/RM) are + * described; the medium access control (MAC) and logical link control (LLC) + * sublayers. The MAC sublayer is the one facing the physical layer. * - * This switch intellectual property cannot conform to this part of the standard - * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC - * DAs, it also includes :22-FF which the scope of propagation is not supposed - * to be restricted for these MAC DAs. + * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A + * Bridge component comprises a MAC Relay Entity for interconnecting the Ports + * of the Bridge, at least two Ports, and higher layer entities with at least a + * Spanning Tree Protocol Entity included. + * + * Each Bridge Port also functions as an end station and shall provide the MAC + * Service to an LLC Entity. Each instance of the MAC Service is provided to a + * distinct LLC Entity that supports protocol identification, multiplexing, and + * demultiplexing, for protocol data unit (PDU) transmission and reception by + * one or more higher layer entities. + * + * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC + * Entity associated with each Bridge Port is modeled as being directly + * connected to the attached Local Area Network (LAN). + * + * On the switch with CPU port architecture, CPU port functions as Management + * Port, and the Management Port functionality is provided by software which + * functions as an end station. Software is connected to an IEEE 802 LAN that is + * wholly contained within the system that incorporates the Bridge. Software + * provides access to the LLC Entity associated with each Bridge Port by the + * value of the source port field on the special tag on the frame received by + * software. + * + * We call frames that carry control information to determine the active + * topology and current extent of each Virtual Local Area Network (VLAN), i.e., + * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration + * Protocol Data Units (MVRPDUs), and frames from other link constrained + * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and + * Link Layer Discovery Protocol (LLDP), link-local frames. They are not + * forwarded by a Bridge. Permanently configured entries in the filtering + * database (FDB) ensure that such frames are discarded by the Forwarding + * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: + * + * Each of the reserved MAC addresses specified in Table 8-1 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be + * permanently configured in the FDB in C-VLAN components and ERs. + * + * Each of the reserved MAC addresses specified in Table 8-2 + * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently + * configured in the FDB in S-VLAN components. + * + * Each of the reserved MAC addresses specified in Table 8-3 + * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in + * TPMR components. + * + * The FDB entries for reserved MAC addresses shall specify filtering for all + * Bridge Ports and all VIDs. Management shall not provide the capability to + * modify or remove entries for reserved MAC addresses. + * + * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of + * propagation of PDUs within a Bridged Network, as follows: + * + * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no + * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) + * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. + * PDUs transmitted using this destination address, or any other addresses + * that appear in Table 8-1, Table 8-2, and Table 8-3 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can + * therefore travel no further than those stations that can be reached via a + * single individual LAN from the originating station. + * + * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an + * address that no conformant S-VLAN component, C-VLAN component, or MAC + * Bridge can forward; however, this address is relayed by a TPMR component. + * PDUs using this destination address, or any of the other addresses that + * appear in both Table 8-1 and Table 8-2 but not in Table 8-3 + * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by + * any TPMRs but will propagate no further than the nearest S-VLAN component, + * C-VLAN component, or MAC Bridge. + * + * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address + * that no conformant C-VLAN component, MAC Bridge can forward; however, it is + * relayed by TPMR components and S-VLAN components. PDUs using this + * destination address, or any of the other addresses that appear in Table 8-1 + * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), + * will be relayed by TPMR components and S-VLAN components but will propagate + * no further than the nearest C-VLAN component or MAC Bridge. + * + * Because the LLC Entity associated with each Bridge Port is provided via CPU + * port, we must not filter these frames but forward them to CPU port. + * + * In a Bridge, the transmission Port is majorly decided by ingress and egress + * rules, FDB, and spanning tree Port State functions of the Forwarding Process. + * For link-local frames, only CPU port should be designated as destination port + * in the FDB, and the other functions of the Forwarding Process must not + * interfere with the decision of the transmission Port. We call this process + * trapping frames to CPU port. + * + * Therefore, on the switch with CPU port architecture, link-local frames must + * be trapped to CPU port, and certain link-local frames received by a Port of a + * Bridge comprising a TPMR component or an S-VLAN component must be excluded + * from it. + * + * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port + * MAC Relay (TPMR) component as a TPMR component supports only a subset of the + * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port + * doesn't count) of this architecture will either function as a standard MAC + * Bridge or a standard VLAN Bridge. + * + * Therefore, a Bridge of this architecture can only comprise S-VLAN components, + * C-VLAN components, or MAC Bridge components. Since there's no TPMR component, + * we don't need to relay PDUs using the destination addresses specified on the + * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge + * section where they must be relayed by TPMR components. + * + * One option to trap link-local frames to CPU port is to add static FDB entries + * with CPU port designated as destination port. However, because that + * Independent VLAN Learning (IVL) is being used on every VID, each entry only + * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC + * Bridge component or a C-VLAN component, there would have to be 16 times 4096 + * entries. This switch intellectual property can only hold a maximum of 2048 + * entries. Using this option, there also isn't a mechanism to prevent + * link-local frames from being discarded when the spanning tree Port State of + * the reception Port is discarding. + * + * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 + * registers. Whilst this applies to every VID, it doesn't contain all of the + * reserved MAC addresses without affecting the remaining Standard Group MAC + * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the + * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination + * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF + * destination addresses which may be relayed by MAC Bridges or VLAN Bridges. + * The latter option provides better but not complete conformance. + * + * This switch intellectual property also does not provide a mechanism to trap + * link-local frames with specific destination addresses to CPU port by Bridge, + * to conform to the filtering rules for the distinct Bridge components. + * + * Therefore, regardless of the type of the Bridge component, link-local frames + * with these destination addresses will be trapped to CPU port: + * + * 01-80-C2-00-00-[00,01,02,03,0E] + * + * In a Bridge comprising a MAC Bridge component or a C-VLAN component: + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] + * + * In a Bridge comprising an S-VLAN component: + * + * Link-local frames with these destination addresses will be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-00 + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A] + * + * To trap link-local frames to CPU port as conformant as this switch + * intellectual property can allow, link-local frames are made to be regarded as + * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual + * property only lets the frames regarded as BPDUs bypass the spanning tree Port + * State function of the Forwarding Process. + * + * The only remaining interference is the ingress rules. When the reception Port + * has no PVID assigned on software, VLAN-untagged frames won't be allowed in. + * There doesn't seem to be a mechanism on the switch intellectual property to + * have link-local frames bypass this function of the Forwarding Process. */ static void mt753x_trap_frames(struct mt7530_priv *priv) @@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them * VLAN-untagged. */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | - MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | - MT753X_BPDU_PORT_FW_MASK, - MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_BPC, + MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_BPDU_FR | + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | - MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | - MT753X_R01_PORT_FW_MASK, - MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC1, + MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK, + MT753X_R02_BPDU_FR | + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | - MT753X_R03_PORT_FW_MASK, - MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC2, + MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK, + MT753X_R0E_BPDU_FR | + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static void @@ -1722,14 +1883,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, static int mt753x_mirror_port_get(unsigned int id, u32 val) { - return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) : - MIRROR_PORT(val); + return (id == ID_MT7531 || id == ID_MT7988) ? + MT7531_MIRROR_PORT_GET(val) : + MIRROR_PORT(val); } static int mt753x_mirror_port_set(unsigned int id, u32 val) { - return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) : - MIRROR_PORT(val); + return (id == ID_MT7531 || id == ID_MT7988) ? + MT7531_MIRROR_PORT_SET(val) : + MIRROR_PORT(val); } static int mt753x_port_mirror_add(struct dsa_switch *ds, int port, @@ -2268,8 +2431,6 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - mt7530_pll_setup(priv); - /* Lower Tx driving for TRGMII path */ for (i = 0; i < NUM_TRGMII_CTRL; i++) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), @@ -2285,6 +2446,9 @@ mt7530_setup(struct dsa_switch *ds) val |= MHWTRAP_MANUAL; mt7530_write(priv, MT7530_MHWTRAP, val); + if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ) + mt7530_pll_setup(priv); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2318,6 +2482,9 @@ mt7530_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Setup VLAN ID 0 for VLAN-unaware bridges */ ret = mt7530_setup_vlan0(priv); if (ret) @@ -2429,6 +2596,9 @@ mt7531_setup_common(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Flush the FDB table */ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); if (ret < 0) @@ -2504,18 +2674,25 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK, MT7531_GPIO0_INTERRUPT); - /* Enable PHY core PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we provide - * our own mt7531_ind_mmd_phy_[read,write] to complete this - * function. + /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since + * phy_device has not yet been created provided for + * phy_[read,write]_mmd_indirect is called, we provide our own + * mt7531_ind_mmd_phy_[read,write] to complete this function. */ val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4); - val |= MT7531_PHY_PLL_BYPASS_MODE; + val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE; val &= ~MT7531_PHY_PLL_OFF; mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4, val); + /* Disable EEE advertisement on the switch PHYs. */ + for (i = MT753X_CTRL_PHY_ADDR; + i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) { + mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + 0); + } + mt7531_setup_common(ds); /* Setup VLAN ID 0 for VLAN-unaware bridges */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index d17b318e6ee4..a08053390b28 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -32,6 +32,10 @@ enum mt753x_id { #define SYSC_REG_RSTCTRL 0x34 #define RESET_MCM BIT(2) +/* Register for ARL global control */ +#define MT753X_AGC 0xc +#define LOCAL_EN BIT(7) + /* Registers to mac forward control for unknown frames */ #define MT7530_MFC 0x10 #define BC_FFP(x) (((x) & 0xff) << 24) @@ -65,6 +69,7 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 +#define MT753X_PAE_BPDU_FR BIT(25) #define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) #define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) @@ -75,20 +80,24 @@ enum mt753x_id { /* Register for :01 and :02 MAC DA frame control */ #define MT753X_RGAC1 0x28 +#define MT753X_R02_BPDU_FR BIT(25) #define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) #define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_BPDU_FR BIT(9) #define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) #define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_BPDU_FR BIT(25) #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_BPDU_FR BIT(9) #define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) #define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) @@ -616,6 +625,7 @@ enum mt7531_clk_skew { #define RG_SYSPLL_DDSFBK_EN BIT(12) #define RG_SYSPLL_BIAS_EN BIT(11) #define RG_SYSPLL_BIAS_LPF_EN BIT(10) +#define MT7531_RG_SYSPLL_DMY2 BIT(6) #define MT7531_PHY_PLL_OFF BIT(5) #define MT7531_PHY_PLL_BYPASS_MODE BIT(4) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9ed1821184ec..c95787cb9086 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5503,8 +5503,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .family = MV88E6XXX_FAMILY_6250, .name = "Marvell 88E6020", .num_databases = 64, - .num_ports = 4, + /* Ports 2-4 are not routed to pins + * => usable ports 0, 1, 5, 6 + */ + .num_ports = 7, .num_internal_phys = 2, + .invalid_port_mask = BIT(2) | BIT(3) | BIT(4), .max_vid = 4095, .port_base_addr = 0x8, .phy_base_addr = 0x0, diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 833e55e4b961..52ddb4ef259e 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg) return tmp & 0xffff; } -int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd, +int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg, u16 val) { struct sja1105_mdio_private *mdio_priv = bus->priv; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 9e9e4a03f1a8..2d8a66ea82fa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; io_sq->bounce_buf_ctrl.next_to_use = 0; - size = io_sq->bounce_buf_ctrl.buffer_size * + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num; dev_node = dev_to_node(ena_dev->dmadev); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 09e7da1a69c9..be5acfa41ee0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) @@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c index 337c435d3ce9..5b175e7e92a1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_xdp.c +++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c @@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring, rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx); if (unlikely(rc)) - return rc; + goto err; ena_tx_ctx.req_id = req_id; @@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring, error_unmap_dma: ena_unmap_tx_buff(tx_ring, tx_info); +err: tx_info->xdpf = NULL; + return rc; } diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 9662ee72814c..536635e57727 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -593,6 +593,16 @@ err_out: pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY); } +void pdsc_pci_reset_thread(struct work_struct *work) +{ + struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work); + struct pci_dev *pdev = pdsc->pdev; + + pci_dev_get(pdev); + pci_reset_function(pdev); + pci_dev_put(pdev); +} + static void pdsc_check_pci_health(struct pdsc *pdsc) { u8 fw_status; @@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc) if (fw_status != PDS_RC_BAD_PCI) return; - pci_reset_function(pdsc->pdev); + /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */ + queue_work(pdsc->wq, &pdsc->pci_reset_work); } void pdsc_health_thread(struct work_struct *work) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 92d7657dd614..a3e17a0c187a 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -197,6 +197,7 @@ struct pdsc { struct pdsc_qcq notifyqcq; u64 last_eid; struct pdsc_viftype *viftype_status; + struct work_struct pci_reset_work; }; /** enum pds_core_dbell_bits - bitwise composition of dbell values. @@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, void pdsc_fw_down(struct pdsc *pdsc); void pdsc_fw_up(struct pdsc *pdsc); +void pdsc_pci_reset_thread(struct work_struct *work); #endif /* _PDSC_H_ */ diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index e494e1298dc9..495ef4ef8c10 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc) .reset.opcode = PDS_CORE_CMD_RESET, }; + if (!pdsc_is_fw_running(pdsc)) + return 0; + return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout); } diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index ab6133e7db42..660268ff9562 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid); pdsc->wq = create_singlethread_workqueue(wq_name); INIT_WORK(&pdsc->health_work, pdsc_health_thread); + INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread); timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index dd06b68b33ed..72ea97c5d5d4 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -392,7 +392,9 @@ static void umac_reset(struct bcmasp_intf *intf) umac_wl(intf, 0x0, UMC_CMD); umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD); usleep_range(10, 100); - umac_wl(intf, 0x0, UMC_CMD); + /* We hold the umac in reset and bring it out of + * reset when phy link is up. + */ } static void umac_set_hw_addr(struct bcmasp_intf *intf, @@ -412,6 +414,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask, u32 reg; reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + return; if (enable) reg |= mask; else @@ -430,7 +434,6 @@ static void umac_init(struct bcmasp_intf *intf) umac_wl(intf, 0x800, UMC_FRM_LEN); umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL); umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); - umac_enable_set(intf, UMC_CMD_PROMISC, 1); } static int bcmasp_tx_poll(struct napi_struct *napi, int budget) @@ -658,6 +661,12 @@ static void bcmasp_adj_link(struct net_device *dev) UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE | UMC_CMD_TX_PAUSE_IGNORE); reg |= cmd_bits; + if (reg & UMC_CMD_SW_RESET) { + reg &= ~UMC_CMD_SW_RESET; + umac_wl(intf, reg, UMC_CMD); + udelay(2); + reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC; + } umac_wl(intf, reg, UMC_CMD); active = phy_init_eee(phydev, 0) >= 0; @@ -1035,19 +1044,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) /* Indicate that the MAC is responsible for PHY PM */ phydev->mac_managed_pm = true; - } else if (!intf->wolopts) { - ret = phy_resume(dev->phydev); - if (ret) - goto err_phy_disable; } umac_reset(intf); umac_init(intf); - /* Disable the UniMAC RX/TX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0); - umac_set_hw_addr(intf, dev->dev_addr); intf->old_duplex = -1; @@ -1062,9 +1064,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll); bcmasp_enable_rx(intf, 1); - /* Turn on UniMAC TX/RX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1); - intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD); bcmasp_netif_start(dev); @@ -1306,7 +1305,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf) if (intf->wolopts & WAKE_FILTER) bcmasp_netfilt_suspend(intf); - /* UniMAC receive needs to be turned on */ + /* Bring UniMAC out of reset if needed and enable RX */ + reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + reg &= ~UMC_CMD_SW_RESET; + + reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC; + umac_wl(intf, reg, UMC_CMD); + umac_enable_set(intf, UMC_CMD_RX_EN, 1); if (intf->parent->wol_irq > 0) { @@ -1324,7 +1330,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) { struct device *kdev = &intf->parent->pdev->dev; struct net_device *dev = intf->ndev; - int ret = 0; if (!netif_running(dev)) return 0; @@ -1334,10 +1339,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) bcmasp_netif_deinit(dev); if (!intf->wolopts) { - ret = phy_suspend(dev->phydev); - if (ret) - goto out; - if (intf->internal_phy) bcmasp_ephy_enable_set(intf, false); else @@ -1354,11 +1355,7 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) clk_disable_unprepare(intf->parent->clk); - return ret; - -out: - bcmasp_netif_init(dev, false); - return ret; + return 0; } static void bcmasp_resume_from_wol(struct bcmasp_intf *intf) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 493b724848c8..57e61f963167 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11758,6 +11758,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + if (bp->ptp_cfg) + atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); bnxt_cfg_usr_fltrs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 93f9bd55020f..195c02dc0683 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err) if (err) return; + if (edev->ulp_tbl->msix_requested) + bnxt_fill_msix_vecs(bp, edev->msix_entries); + if (aux_priv) { struct auxiliary_device *adev; @@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp) if (!edev) goto aux_dev_uninit; + aux_priv->edev = edev; + ulp = kzalloc(sizeof(*ulp), GFP_KERNEL); if (!ulp) goto aux_dev_uninit; edev->ulp_tbl = ulp; - aux_priv->edev = edev; bp->edev = edev; bnxt_set_edev_info(edev, bp); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 7396e2823e32..b1f84b37032a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3280,7 +3280,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, } /* Returns a reusable dma control register value */ -static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) +static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx) { unsigned int i; u32 reg; @@ -3305,6 +3305,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) udelay(10); bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); + if (flush_rx) { + reg = bcmgenet_rbuf_ctrl_get(priv); + bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); + udelay(10); + bcmgenet_rbuf_ctrl_set(priv, reg); + udelay(10); + } + return dma_ctrl; } @@ -3368,8 +3376,8 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + /* Disable RX/TX DMA and flush TX and RX queues */ + dma_ctrl = bcmgenet_dma_disable(priv, true); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); @@ -4235,7 +4243,7 @@ static int bcmgenet_resume(struct device *d) bcmgenet_hfb_create_rxnfc_filter(priv, rule); /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + dma_ctrl = bcmgenet_dma_disable(priv, false); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d7693fdf640d..8bd213da8fb6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = true; - phy_attached_info(phy_dev); return 0; @@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index f3c9395d8351..618f66d9586b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, true); - desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); + desc.data[0] = cpu_to_le32(tqp->index); ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 999a0ee162a6..941cb529d671 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 +#define HNS3_NIC_LB_TEST_UNEXECUTED 4 + +static int hns3_get_sset_count(struct net_device *netdev, int stringset); static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { @@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev, static void hns3_self_test(struct net_device *ndev, struct ethtool_test *eth_test, u64 *data) { + int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; int st_param[HNAE3_LOOP_NONE][2]; bool if_running = netif_running(ndev); + int i; + + /* initialize the loopback test result, avoid marking an unexcuted + * loopback test as PASS. + */ + for (i = 0; i < cnt; i++) + data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; if (hns3_nic_resetting(ndev)) { netdev_err(ndev, "dev resetting!"); - return; + goto failure; } if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) - return; + goto failure; if (netif_msg_ifdown(h)) netdev_info(ndev, "self test start\n"); @@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev, if (netif_msg_ifdown(h)) netdev_info(ndev, "self test end\n"); + return; + +failure: + eth_test->flags |= ETH_TEST_FL_FAILED; } static void hns3_update_limit_promisc_mode(struct net_device *netdev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index b4afb66efe5c..ff6a2ed23ddb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11626,6 +11626,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_pci_uninit; + devl_lock(hdev->devlink); + /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) @@ -11805,6 +11807,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + devl_unlock(hdev->devlink); return 0; err_mdiobus_unreg: @@ -11817,6 +11820,7 @@ err_msi_uninit: err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_devlink_uninit: + devl_unlock(hdev->devlink); hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 1fef6bb5a5fb..4b6e7536170a 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -628,6 +628,7 @@ struct e1000_phy_info { u32 id; u32 reset_delay_us; /* in usec */ u32 revision; + u32 retry_count; enum e1000_media_type media_type; @@ -644,6 +645,7 @@ struct e1000_phy_info { bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; + bool retry_enabled; }; struct e1000_nvm_info { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 19e450a5bd31..f9e94be36e97 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -222,11 +222,18 @@ out: if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; @@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) goto out; } + /* There is no guarantee that the PHY is accessible at this time + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is * inaccessible and resetting the PHY is not blocked, toggle the * LANPHYPC Value bit to force the interconnect to PCIe mode. @@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) break; } + e1000e_enable_phy_retry(hw); + hw->phy.ops.release(hw); if (!ret_val) { @@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->id = e1000_phy_unknown; + if (hw->mac.type == e1000_pch_mtp) { + phy->retry_count = 2; + e1000e_enable_phy_retry(hw); + } + ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) return ret_val; @@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) - goto release; - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); - /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) { @@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index cc8c531ec3df..3692fce20195 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) struct e1000_hw *hw = &adapter->hw; u32 ctrl, ctrl_ext, rctl, status, wufc; int retval = 0; + u16 smb_ctrl; /* Runtime suspend should only enable wakeup for link changes */ if (runtime) @@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (retval) return retval; } + + /* Force SMBUS to allow WOL */ + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl); + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); } /* Ensure that the appropriate bits are set in LPI_CTRL diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 5e329156d1ba..93544f1cc2a5 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0); } +void e1000e_disable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = false; +} + +void e1000e_enable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = true; +} + /** * e1000e_read_phy_reg_mdic - Read MDI control register * @hw: pointer to the HW structure @@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) **/ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = ((offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_READ)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = ((offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Read PHY Reg Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Read offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; - } - *data = (u16)mdic; + ew32(MDIC, mdic); - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); - return 0; + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Read PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Read PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Read offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } + + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) { + *data = (u16)mdic; + return 0; + } + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } + } + + return -E1000_ERR_PHY; } /** @@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) **/ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = (((u32)data) | - (offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_WRITE)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = (((u32)data) | + (offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Write PHY Red Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Write offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; + ew32(MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Write PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Write PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Write offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } + + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) + return 0; + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } } - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); - - return 0; + return -E1000_ERR_PHY; } /** diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h index c48777d09523..049bb325b4b1 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.h +++ b/drivers/net/ethernet/intel/e1000e/phy.h @@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data); void e1000_power_up_phy_copper(struct e1000_hw *hw); void e1000_power_down_phy_copper(struct e1000_hw *hw); +void e1000e_disable_phy_retry(struct e1000_hw *hw); +void e1000e_enable_phy_retry(struct e1000_hw *hw); s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ba24f3fa92c3..2fbabcdb5bb5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -955,6 +955,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f86578857e8a..48b9ddb2b1b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } @@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 14ab642cafdb..432afbb64201 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -333,8 +333,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0d7177083708..1a12b732818e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2630,7 +2630,22 @@ process_next: return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { @@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2845,6 +2883,8 @@ tx_only: */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index abf15067eb5d..2cdc7de6301c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -68,6 +68,7 @@ enum i40e_dyn_idx { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 83a34e98bdc7..232b65b9c8ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); @@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) /* Allow to delete VF primary MAC only if it was not set * administratively by PF or if VF is trusted. */ - if (ether_addr_equal(addr, vf->default_lan_addr.addr) && - i40e_can_vf_change_mac(vf)) - was_unimac_deleted = true; - else - continue; + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + } if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 8040317c9561..1f3e7a6903e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -593,8 +593,9 @@ struct ice_aqc_recipe_data_elem { struct ice_aqc_recipe_to_profile { __le16 profile_id; u8 rsvd[6]; - DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); + __le64 recipe_assoc; }; +static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16); /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index db4b2844e1f7..d9f6cc71d900 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1002,8 +1002,8 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw) */ int ice_init_hw(struct ice_hw *hw) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); - void *mac_buf __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; + void *mac_buf __free(kfree) = NULL; u16 mac_buf_len; int status; @@ -3272,7 +3272,7 @@ int ice_update_link_info(struct ice_port_info *pi) return status; if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) @@ -3420,7 +3420,7 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, int ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_hw *hw; int status; @@ -3561,7 +3561,7 @@ int ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; struct ice_hw *hw; int status; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 255a9c8151b4..78b833b3e1d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -941,11 +941,11 @@ static u64 ice_loopback_test(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *orig_vsi = np->vsi, *test_vsi; struct ice_pf *pf = orig_vsi->back; + u8 *tx_frame __free(kfree) = NULL; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; - u8 *tx_frame __free(kfree); int i; netdev_info(netdev, "loopback test\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index f97128b69f87..f0e76f0a6d60 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2041,7 +2041,7 @@ int ice_init_lag(struct ice_pf *pf) /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + &recipe_bits, NULL); if (err) continue; @@ -2049,7 +2049,7 @@ int ice_init_lag(struct ice_pf *pf) recipe_bits |= BIT(lag->pf_recipe) | BIT(lag->lport_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + recipe_bits, NULL); } } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index ee3f0d3e3f6d..558422120312 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3091,7 +3091,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; - int prev_num_q_vectors = 0; + int prev_num_q_vectors; struct ice_pf *pf; int ret; @@ -3105,13 +3105,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; - coalesce = kcalloc(vsi->num_q_vectors, - sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (!coalesce) - return -ENOMEM; - - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); - ret = ice_vsi_realloc_stat_arrays(vsi); if (ret) goto err_vsi_cfg; @@ -3121,6 +3114,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (ret) goto err_vsi_cfg; + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ret = ice_vsi_cfg_tc_lan(pf, vsi); if (ret) { if (vsi_flags & ICE_VSI_FLAG_INIT) { @@ -3139,8 +3139,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) err_vsi_cfg_tc_lan: ice_vsi_decfg(vsi); -err_vsi_cfg: kfree(coalesce); +err_vsi_cfg: return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index d4baae8c3b72..b4ea935e8300 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2025,12 +2025,12 @@ error_out: * ice_aq_map_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Recipe to profile association (0x0291) */ int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2042,7 +2042,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, /* Set the recipe ID bit in the bitmask to let the device know which * profile we are associating the recipe to */ - memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); + cmd->recipe_assoc = cpu_to_le64(r_assoc); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -2051,12 +2051,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, * ice_aq_get_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Associate profile ID with given recipe (0x0293) */ int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2069,7 +2069,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); if (!status) - memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); + *r_assoc = le64_to_cpu(cmd->recipe_assoc); return status; } @@ -2108,6 +2108,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) static void ice_get_recp_to_prof_map(struct ice_hw *hw) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 i; for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { @@ -2115,8 +2116,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw) bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); - if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) + if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL)) continue; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_copy(profile_to_recipe[i], r_bitmap, ICE_MAX_NUM_RECIPES); for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) @@ -5390,22 +5392,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, */ list_for_each_entry(fvit, &rm->fv_list, list_entry) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 j; status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, NULL); + &recp_assoc, NULL); if (status) goto err_unroll; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_acquire_change_lock(hw, ICE_RES_WRITE); if (status) goto err_unroll; + bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, - NULL); + recp_assoc, NULL); ice_release_change_lock(hw); if (status) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index db7e501b7e0a..89ffa1b51b5a 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw, struct ice_aqc_recipe_data_elem *s_recipe_list, u16 num_recipes, struct ice_sq_cd *cd); int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd); int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index b890410a2bc0..688ccb0615ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, * - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified) * - Tunnel flag (present if tunnel) */ + if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) + lkups_cnt++; if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) lkups_cnt++; @@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, /* Always add direction metadata */ ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]); + if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { + ice_rule_add_src_vsi_metadata(&list[i]); + i++; + } + rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type); if (tc_fltr->tunnel_type != TNL_LAST) { i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i); @@ -772,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) int ret; int i; - if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); return -EOPNOTSUPP; } @@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) /* specify the cookie as filter_rule_id */ rule_info.fltr_rule_id = fltr->cookie; + rule_info.src_vsi = vsi->idx; ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); if (ret == -EEXIST) { @@ -1481,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | - BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) { + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel"); return -EOPNOTSUPP; } else { diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index 80dc4bcdd3a4..b3e1bdcb80f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) struct ice_vsi_vlan_ops *vlan_ops; struct ice_pf *pf = vsi->back; + /* setup inner VLAN ops */ + vlan_ops = &vsi->inner_vlan_ops; + if (ice_is_dvm_ena(&pf->hw)) { - vlan_ops = &vsi->outer_vlan_ops; - - /* setup outer VLAN ops */ - vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; - - /* setup inner VLAN ops */ - vlan_ops = &vsi->inner_vlan_ops; vlan_ops->add_vlan = noop_vlan_arg; vlan_ops->del_vlan = noop_vlan_arg; vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; - } else { - vlan_ops = &vsi->inner_vlan_ops; + /* setup outer VLAN ops */ + vlan_ops = &vsi->outer_vlan_ops; + vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; + vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; + } else { vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 6dd7a66bb897..f5bc4a278074 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2941,6 +2941,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); + skb->protocol = eth_type_trans(skb, rxq->vport->netdev); + decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; /* If we don't know the ptype we can't do anything else with it. Just * pass it up the stack as-is. @@ -2951,8 +2953,6 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, /* process RSS/hash */ idpf_rx_hash(rxq, skb, rx_desc, &decoded); - skb->protocol = eth_type_trans(skb, rxq->vport->netdev); - if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2e1cfbd82f4f..35ad40a803cb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1642,10 +1642,6 @@ done: if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - /* FIXME: add support for retrieving timestamps from - * the other timer registers before skipping the - * timestamping request. - */ unsigned long flags; u32 tstamp_flags; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 13a6fca31004..866024f2b9ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 3c0f55b3e48e..b86f3224f0b7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 72e060cf6b61..e9bf9231b018 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { + if (iter >= MAX_LMAC_COUNT) + continue; lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d39001cdc707..00af8888e329 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4819,19 +4819,19 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) */ rvu_write64(rvu, blkaddr, NIX_AF_CFG, rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL); - - /* Set chan/link to backpressure TL3 instead of TL2 */ - rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); - - /* Disable SQ manager's sticky mode operation (set TM6 = 0) - * This sticky mode is known to cause SQ stalls when multiple - * SQs are mapped to same SMQ and transmitting pkts at a time. - */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); - cfg &= ~BIT_ULL(15); - rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); } + /* Set chan/link to backpressure TL3 instead of TL2 */ + rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); + + /* Disable SQ manager's sticky mode operation (set TM6 = 0) + * This sticky mode is known to cause SQ stalls when multiple + * SQs are mapped to same SMQ and transmitting pkts at a time. + */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); + cfg &= ~BIT_ULL(15); + rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); + ltdefs = rvu->kpu.lt_def; /* Calibrate X2P bus to check if CGX/LBK links are fine */ err = nix_calibrate_x2p(rvu, blkaddr); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e350242bbafb..be709f83f331 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b40bd0e46751..3f46d5e0fb2e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 87bdb93cb066..f4655a8c0705 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -689,6 +689,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; + u32 val; flow_rule_match_control(rule, &match); if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { @@ -697,12 +698,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, } if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { + val = match.key->flags & FLOW_DIS_IS_FRAGMENT; if (ntohs(flow_spec->etype) == ETH_P_IP) { - flow_spec->ip_flag = IPV4_FLAG_MORE; + flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0; flow_mask->ip_flag = IPV4_FLAG_MORE; req->features |= BIT_ULL(NPC_IPFRAG_IPV4); } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) { - flow_spec->next_header = IPPROTO_FRAGMENT; + flow_spec->next_header = val ? + IPPROTO_FRAGMENT : 0; flow_mask->next_header = 0xff; req->features |= BIT_ULL(NPC_IPFRAG_IPV6); } else { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index 1e77bbf5d22a..1723e9912ae0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent, otx2_qos_read_txschq_cfg_tl(node, cfg); cnt = cfg->static_node_pos[node->level]; cfg->schq_contig_list[node->level][cnt] = node->schq; + cfg->schq_index_used[node->level][cnt] = true; cfg->schq_contig[node->level]++; cfg->static_node_pos[node->level]++; otx2_qos_read_txschq_cfg_schq(node, cfg); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index c895e265ae0e..61334a71058c 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) static void mtk_wed_stop(struct mtk_wed_device *dev) { + mtk_wed_dma_disable(dev); mtk_wed_set_ext_int(dev, false); wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0); wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0); wdma_w32(dev, MTK_WDMA_INT_MASK, 0); wdma_w32(dev, MTK_WDMA_INT_GRP2, 0); - wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0); if (!mtk_wed_get_rx_capa(dev)) return; @@ -1093,7 +1093,6 @@ static void mtk_wed_deinit(struct mtk_wed_device *dev) { mtk_wed_stop(dev); - mtk_wed_dma_disable(dev); wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WDMA_INT_AGENT_EN | @@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) static void mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask) { - if (!dev->running) - return; - mtk_wed_set_ext_int(dev, !!mask); wed_w32(dev, MTK_WED_INT_MASK, mask); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 86f1854698b4..883c044852f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo * } static inline u8 +mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo) +{ + return fifo->data[fifo->mask & fifo->cc]; +} + +static inline void mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo) { - return fifo->data[fifo->mask & fifo->cc++]; + fifo->cc++; } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index e87e26f2c669..6743806b8480 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, txq_ix = mlx5e_qid_from_qos(chs, node_qid); - WARN_ON(node_qid > priv->htb_max_qos_sqs); - if (node_qid == priv->htb_max_qos_sqs) { - struct mlx5e_sq_stats *stats, **stats_list = NULL; + WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb)); + if (!priv->htb_qos_sq_stats) { + struct mlx5e_sq_stats **stats_list; - if (priv->htb_max_qos_sqs == 0) { - stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), - sizeof(*stats_list), - GFP_KERNEL); - if (!stats_list) - return -ENOMEM; - } - stats = kzalloc(sizeof(*stats), GFP_KERNEL); - if (!stats) { - kvfree(stats_list); + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), GFP_KERNEL); + if (!stats_list) return -ENOMEM; - } - if (stats_list) - WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + } + + if (!priv->htb_qos_sq_stats[node_qid]) { + struct mlx5e_sq_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); /* Order htb_max_qos_sqs increment after writing the array pointer. * Pairs with smp_load_acquire in en_stats.c. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 0ab9db319530..22918b2ef7f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -108,7 +108,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + rtnl_lock(); mlx5e_activate_txqsq(sq); + rtnl_unlock(); + if (sq->channel) mlx5e_trigger_napi_icosq(sq->channel); else @@ -179,12 +182,16 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); + rtnl_lock(); mlx5e_deactivate_priv_channels(priv); + rtnl_unlock(); mlx5e_ptp_close(chs->ptp); err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); + rtnl_lock(); mlx5e_activate_priv_channels(priv); + rtnl_unlock(); /* return carrier back if needed */ if (carrier_ok) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c index bcafb4bf9415..8d9a3b5ec973 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels) return min_t(u32, rqt_size, max_cap_rqt_size); } +#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256 + +unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void) +{ + return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR; +} + void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) { mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h index e0bc30308c77..2f9e04a8418f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) } u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels); +unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void); int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id); int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c index f675b1926340..f66bbc846464 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) void mlx5e_selq_cleanup(struct mlx5e_selq *selq) { + mutex_lock(selq->state_lock); WARN_ON_ONCE(selq->is_prepared); kvfree(selq->standby); @@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq) kvfree(selq->standby); selq->standby = NULL; + mutex_unlock(selq->state_lock); } void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index c7f542d0b8f0..93cf23278d93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -46,6 +46,10 @@ struct arfs_table { struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; +enum { + MLX5E_ARFS_STATE_ENABLED, +}; + enum arfs_type { ARFS_IPV4_TCP, ARFS_IPV6_TCP, @@ -60,6 +64,7 @@ struct mlx5e_arfs_tables { spinlock_t arfs_lock; int last_filter_id; struct workqueue_struct *wq; + unsigned long state; }; struct arfs_tuple { @@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) return err; } } + set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + return 0; } @@ -455,6 +462,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs) int i; int j; + clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + spin_lock_bh(&arfs->arfs_lock); mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); @@ -627,17 +636,8 @@ static void arfs_handle_work(struct work_struct *work) struct mlx5_flow_handle *rule; arfs = mlx5e_fs_get_arfs(priv->fs); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&arfs->arfs_lock); - hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&arfs->arfs_lock); - - mutex_unlock(&priv->state_lock); - kfree(arfs_rule); - goto out; - } - mutex_unlock(&priv->state_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) + return; if (!arfs_rule->rule) { rule = arfs_add_rule(priv, arfs_rule); @@ -753,6 +753,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) { + spin_unlock_bh(&arfs->arfs_lock); + return -EPERM; + } + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cc51ce16df14..67a29826bb57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); + if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) { + unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); + + if (count > xor8_max_channels) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n", + __func__, count, xor8_max_channels); + goto out; + } + } + + /* If RXFH is configured, changing the channels number is allowed only if + * it does not require resizing the RSS table. This is because the previous + * configuration may no longer be compatible with the new RSS table. + */ + if (netif_is_rxfh_configured(priv->netdev)) { + int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels); + int new_rqt_size = mlx5e_rqt_size(priv->mdev, count); + + if (new_rqt_size != cur_rqt_size) { + err = -EINVAL; + netdev_err(priv->netdev, + "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n", + __func__, new_rqt_size, cur_rqt_size); + goto out; + } + } + /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. @@ -561,12 +589,12 @@ static int mlx5e_get_coalesce(struct net_device *netdev, static void mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct mlx5_core_dev *mdev = priv->mdev; int tc; int i; for (i = 0; i < priv->channels.num; ++i) { struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5_core_dev *mdev = c->mdev; for (tc = 0; tc < c->num_tc; tc++) { mlx5_core_modify_cq_moderation(mdev, @@ -580,11 +608,11 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal static void mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct mlx5_core_dev *mdev = priv->mdev; int i; for (i = 0; i < priv->channels.num; ++i) { struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5_core_dev *mdev = c->mdev; mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, coal->rx_coalesce_usecs, @@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct mlx5e_priv *priv = netdev_priv(dev); u32 *rss_context = &rxfh->rss_context; u8 hfunc = rxfh->hfunc; + unsigned int count; int err; mutex_lock(&priv->state_lock); + + count = priv->channels.params.num_channels; + + if (hfunc == ETH_RSS_HASH_XOR) { + unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); + + if (count > xor8_max_channels) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", + __func__, count, xor8_max_channels); + goto unlock; + } + } + if (*rss_context && rxfh->rss_delete) { err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); goto unlock; } if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - unsigned int count = priv->channels.params.num_channels; - err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); if (err) goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 91848eae4565..319930c04093 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -209,8 +209,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) *data, mlx5e_devcom_event_mpv, priv); - if (IS_ERR_OR_NULL(priv->devcom)) - return -EOPNOTSUPP; + if (IS_ERR(priv->devcom)) + return PTR_ERR(priv->devcom); if (mlx5_core_is_mp_master(priv->mdev)) { mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, @@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); - mutex_lock(&priv->state_lock); mlx5e_selq_cleanup(&priv->selq); - mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb_max_qos_sqs; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2fa076b23fbe..e21a3b4128ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) { u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); + mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist); + mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); @@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; - if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) - mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, - be32_to_cpu(eseg->flow_table_metadata)); dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); } @@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) eseg->flow_table_metadata = - cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist)); + cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist)); } static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 3047d7015c52..1789800faaeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto abort; + dev->priv.eswitch = esw; err = esw_offloads_init(esw); if (err) goto reps_err; @@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (MLX5_ESWITCH_MANAGER(dev) && - mlx5_esw_vport_match_metadata_supported(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - - dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); esw_info(dev, @@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) reps_err: mlx5_esw_vports_cleanup(esw); + dev->priv.eswitch = NULL; abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); @@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); - esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); mutex_destroy(&esw->state_lock); @@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->offloads.encap_tbl_lock); mutex_destroy(&esw->offloads.decap_tbl_lock); esw_offloads_cleanup(esw); + esw->dev->priv.eswitch = NULL; mlx5_esw_vports_cleanup(esw); debugfs_remove_recursive(esw->debugfs_root); devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index baaae628b0a0..844d3e3a65dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -43,6 +43,7 @@ #include "rdma.h" #include "en.h" #include "fs_core.h" +#include "lib/mlx5.h" #include "lib/devcom.h" #include "lib/eq.h" #include "lib/fs_chains.h" @@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw) if (err) return err; + if (MLX5_ESWITCH_MANAGER(esw->dev) && + mlx5_esw_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + err = devl_params_register(priv_to_devlink(esw->dev), esw_devlink_params, ARRAY_SIZE(esw_devlink_params)); @@ -3055,7 +3060,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) key, mlx5_esw_offloads_devcom_event, esw); - if (IS_ERR_OR_NULL(esw->devcom)) + if (IS_ERR(esw->devcom)) return; mlx5_devcom_send_event(esw->devcom, @@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured."); + return -EPERM; + } + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e6bfa7e4f146..cf085a478e3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, return err; } +static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1, + struct mlx5_pkt_reformat *p2) +{ + return p1->owner == p2->owner && + (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ? + p1->id == p2->id : + mlx5_fs_dr_action_get_pkt_reformat_id(p1) == + mlx5_fs_dr_action_get_pkt_reformat_id(p2)); +} + static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2) { @@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? - (d1->vport.pkt_reformat->id == - d2->vport.pkt_reformat->id) : true)) || + mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat, + d2->vport.pkt_reformat) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } trace_mlx5_fs_set_fte(fte, false); + /* Link newly added rules into the tree. */ for (i = 0; i < handle->num_rules; i++) { - if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + if (!handle->rule[i]->node.parent) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d14459e5c04f..69d482f7c5a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) return err; } - if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { mlx5_lag_port_sel_destroy(ldev); + ldev->buckets = 1; + } if (mlx5_lag_has_drop_rule(ldev)) mlx5_lag_drop_rule_cleanup(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index e7d59cfa8708..7b0766c89f4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -220,7 +220,7 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, struct mlx5_devcom_comp *comp; if (IS_ERR_OR_NULL(devc)) - return NULL; + return ERR_PTR(-EINVAL); mutex_lock(&comp_list_lock); comp = devcom_component_get(devc, id, key, handler); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 5b28084e8a03..dd5d186dc614 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -213,8 +213,8 @@ static int sd_register(struct mlx5_core_dev *dev) sd = mlx5_get_sd(dev); devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP, sd->group_id, NULL, dev); - if (!devcom) - return -ENOMEM; + if (IS_ERR(devcom)) + return PTR_ERR(devcom); sd->devcom = devcom; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c2593625c09a..331ce47f51a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -956,7 +956,7 @@ static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev) mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, mlx5_query_nic_system_image_guid(dev), NULL, dev); - if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) + if (IS_ERR(dev->priv.hca_devcom_comp)) mlx5_core_err(dev, "Failed to register devcom HCA component\n"); } @@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) if (err) goto err_register; + err = mlx5_crdump_enable(dev); + if (err) + mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err); + + err = mlx5_hwmon_dev_register(dev); + if (err) + mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); + mutex_unlock(&dev->intf_state_mutex); return 0; @@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev) int err; devl_lock(devlink); + devl_register(devlink); err = mlx5_init_one_devl_locked(dev); + if (err) + devl_unregister(devlink); devl_unlock(devlink); return err; } @@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); + mlx5_hwmon_dev_unregister(dev); + mlx5_crdump_disable(dev); mlx5_unregister_device(dev); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mlx5_function_teardown(dev, true); out: mutex_unlock(&dev->intf_state_mutex); + devl_unregister(devlink); devl_unlock(devlink); } @@ -1680,16 +1694,23 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) } devl_lock(devlink); + devl_register(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); - devl_unlock(devlink); if (err) { mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); - goto query_hca_caps_err; + goto params_reg_err; } + devl_unlock(devlink); return 0; +params_reg_err: + devl_unregister(devlink); + devl_unlock(devlink); query_hca_caps_err: + devl_unregister(devlink); + devl_unlock(devlink); mlx5_function_disable(dev, true); out: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; @@ -1702,6 +1723,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev) devl_lock(devlink); mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unregister(devlink); devl_unlock(devlink); if (dev->state != MLX5_DEVICE_STATE_UP) return; @@ -1943,16 +1965,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init_one; } - err = mlx5_crdump_enable(dev); - if (err) - dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); - - err = mlx5_hwmon_dev_register(dev); - if (err) - mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); - pci_save_state(pdev); - devlink_register(devlink); return 0; err_init_one: @@ -1973,16 +1986,9 @@ static void remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(dev); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); - /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using - * devlink notify APIs. - * Hence, we must drain them before unregistering the devlink. - */ mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); - devlink_unregister(devlink); mlx5_sriov_disable(pdev, false); - mlx5_hwmon_dev_unregister(dev); - mlx5_crdump_disable(dev); mlx5_uninit_one(dev); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 4dcf995cb1a2..6bac8ad70ba6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -19,6 +19,7 @@ #define MLX5_IRQ_CTRL_SF_MAX 8 /* min num of vectors for SFs to be enabled */ #define MLX5_IRQ_VEC_COMP_BASE_SF 2 +#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) @@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) return; } + vecidx -= MLX5_IRQ_VEC_COMP_BASE; snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } @@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; struct irq_affinity_desc af_desc; - int offset = 1; + int offset = MLX5_IRQ_VEC_COMP_BASE; if (!pool->xa_num_irqs.max) offset = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index bc863e1f062e..7ebe71280827 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto peer_devlink_set_err; } - devlink_register(devlink); return 0; peer_devlink_set_err: @@ -101,7 +100,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) devlink = priv_to_devlink(mdev); set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state); mlx5_drain_health_wq(mdev); - devlink_unregister(devlink); if (mlx5_dev_is_lightweight(mdev)) mlx5_uninit_one_light(mdev); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index 64f4cc284aea..030a5776c937 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size) } static int -dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, +dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id, struct mlx5dr_rule_action_member *action_mem) { struct mlx5dr_action *action = action_mem->action; const u64 action_id = DR_DBG_PTR_TO_ID(action); - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; u64 hit_tbl_ptr, miss_tbl_ptr; u32 hit_tbl_id, miss_tbl_id; int ret; @@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, } static int -dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, +dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste, bool is_rx, const u64 rule_id, u8 format_ver) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; char hw_ste_dump[DR_HEX_SIZE]; u32 mem_rec_type; int ret; @@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, } static int -dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, +dr_dump_rule_rx_tx(struct seq_file *file, char *buff, + struct mlx5dr_rule_rx_tx *rule_rx_tx, bool is_rx, const u64 rule_id, u8 format_ver) { struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; @@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, return 0; while (i--) { - ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id, + ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id, format_ver); if (ret < 0) return ret; @@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, return 0; } -static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) +static noinline_for_stack int +dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) { struct mlx5dr_rule_action_member *action_mem; const u64 rule_id = DR_DBG_PTR_TO_ID(rule); @@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) return ret; if (rx->nic_matcher) { - ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver); + ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver); if (ret < 0) return ret; } if (tx->nic_matcher) { - ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver); + ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver); if (ret < 0) return ret; } list_for_each_entry(action_mem, &rule->rule_actions_list, list) { - ret = dr_dump_rule_action_mem(file, rule_id, action_mem); + ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem); if (ret < 0) return ret; } @@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) } static int -dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, +dr_dump_matcher_mask(struct seq_file *file, char *buff, + struct mlx5dr_match_param *mask, u8 criteria, const u64 matcher_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; char dump[DR_HEX_SIZE]; int ret; @@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, } static int -dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, +dr_dump_matcher_builder(struct seq_file *file, char *buff, + struct mlx5dr_ste_build *builder, u32 index, bool is_rx, const u64 matcher_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; int ret; ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, @@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, } static int -dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, +dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx, struct mlx5dr_matcher_rx_tx *matcher_rx_tx, const u64 matcher_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; enum dr_dump_rec_type rec_type; u64 s_icm_addr, e_icm_addr; int i, ret; @@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, return ret; for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { - ret = dr_dump_matcher_builder(file, + ret = dr_dump_matcher_builder(file, buff, &matcher_rx_tx->ste_builder[i], i, is_rx, matcher_id); if (ret < 0) @@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, return 0; } -static int +static noinline_for_stack int dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) { struct mlx5dr_matcher_rx_tx *rx = &matcher->rx; @@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) if (ret) return ret; - ret = dr_dump_matcher_mask(file, &matcher->mask, + ret = dr_dump_matcher_mask(file, buff, &matcher->mask, matcher->match_criteria, matcher_id); if (ret < 0) return ret; if (rx->nic_tbl) { - ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id); + ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id); if (ret < 0) return ret; } if (tx->nic_tbl) { - ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id); + ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id); if (ret < 0) return ret; } @@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher) } static int -dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, +dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx, struct mlx5dr_table_rx_tx *table_rx_tx, const u64 table_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; enum dr_dump_rec_type rec_type; u64 s_icm_addr; int ret; @@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, return 0; } -static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) +static noinline_for_stack int +dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) { struct mlx5dr_table_rx_tx *rx = &table->rx; struct mlx5dr_table_rx_tx *tx = &table->tx; @@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) return ret; if (rx->nic_dmn) { - ret = dr_dump_table_rx_tx(file, true, rx, + ret = dr_dump_table_rx_tx(file, buff, true, rx, DR_DBG_PTR_TO_ID(table)); if (ret < 0) return ret; } if (tx->nic_dmn) { - ret = dr_dump_table_rx_tx(file, false, tx, + ret = dr_dump_table_rx_tx(file, buff, false, tx, DR_DBG_PTR_TO_ID(table)); if (ret < 0) return ret; @@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl) } static int -dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, +dr_dump_send_ring(struct seq_file *file, char *buff, + struct mlx5dr_send_ring *ring, const u64 domain_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; int ret; ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, @@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, return 0; } -static noinline_for_stack int +static int dr_dump_domain_info_flex_parser(struct seq_file *file, + char *buff, const char *flex_parser_name, const u8 flex_parser_value, const u64 domain_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; int ret; ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH, @@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file, return 0; } -static noinline_for_stack int -dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, +static int +dr_dump_domain_info_caps(struct seq_file *file, char *buff, + struct mlx5dr_cmd_caps *caps, const u64 domain_id) { - char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH]; struct mlx5dr_cmd_vport_cap *vport_caps; unsigned long i, vports_num; int ret; @@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, } static int -dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info, +dr_dump_domain_info(struct seq_file *file, char *buff, + struct mlx5dr_domain_info *info, const u64 domain_id) { int ret; - ret = dr_dump_domain_info_caps(file, &info->caps, domain_id); + ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id); if (ret < 0) return ret; - ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0", + ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0", info->caps.flex_parser_id_icmp_dw0, domain_id); if (ret < 0) return ret; - ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1", + ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1", info->caps.flex_parser_id_icmp_dw1, domain_id); if (ret < 0) return ret; - ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0", + ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0", info->caps.flex_parser_id_icmpv6_dw0, domain_id); if (ret < 0) return ret; - ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1", + ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1", info->caps.flex_parser_id_icmpv6_dw1, domain_id); if (ret < 0) @@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) if (ret) return ret; - ret = dr_dump_domain_info(file, &dmn->info, domain_id); + ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id); if (ret < 0) return ret; if (dmn->info.supp_sw_steering) { - ret = dr_dump_send_ring(file, dmn->send_ring, domain_id); + ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id); if (ret < 0) return ret; } diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 3d09fa54598f..ba303868686a 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mlxbf_gige.h" @@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); -free_irqs: - mlxbf_gige_free_irqs(priv); +phy_deinit: + phy_stop(phydev); return err; } @@ -485,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index e5ec0a363aff..31f75b4a67fd 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -368,7 +368,6 @@ union ks8851_tx_hdr { * @rdfifo: FIFO read callback * @wrfifo: FIFO write callback * @start_xmit: start_xmit() implementation callback - * @rx_skb: rx_skb() implementation callback * @flush_tx_work: flush_tx_work() implementation callback * * The @statelock is used to protect information in the structure which may @@ -423,8 +422,6 @@ struct ks8851_net { struct sk_buff *txp, bool irq); netdev_tx_t (*start_xmit)(struct sk_buff *skb, struct net_device *dev); - void (*rx_skb)(struct ks8851_net *ks, - struct sk_buff *skb); void (*flush_tx_work)(struct ks8851_net *ks); }; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 0bf13b38b8f5..d4cdf3d4f552 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]); } -/** - * ks8851_rx_skb - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb) -{ - ks->rx_skb(ks, skb); -} - /** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. @@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - ks8851_rx_skb(ks, skb); + __netif_rx(skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) unsigned long flags; unsigned int status; + local_bh_disable(); + ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); @@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); + local_bh_enable(); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 2a7f29854267..381b9cd285eb 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp, iowrite16_rep(ksp->hw_addr, txp->data, len / 2); } -/** - * ks8851_rx_skb_par - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks) { return ks8851_rdreg16_par(ks, KS_TXQCR); @@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev) ks->rdfifo = ks8851_rdfifo_par; ks->wrfifo = ks8851_wrfifo_par; ks->start_xmit = ks8851_start_xmit_par; - ks->rx_skb = ks8851_rx_skb_par; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ IRQ_RXI | /* RX done */ \ diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 2f803377c9f9..670c1de966db 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len) return ALIGN(len + 4, 4); } -/** - * ks8851_rx_skb_spi - receive skbuff - * @ks: The device state - * @skb: The skbuff - */ -static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - /** * ks8851_tx_work - process tx packet(s) * @work: The work strucutre what was scheduled. @@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi) ks->rdfifo = ks8851_rdfifo_spi; ks->wrfifo = ks8851_wrfifo_spi; ks->start_xmit = ks8851_start_xmit_spi; - ks->rx_skb = ks8851_rx_skb_spi; ks->flush_tx_work = ks8851_flush_tx_work_spi; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index bd8aa83b47e5..75a988c0bd79 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -25,6 +25,8 @@ #define PCS_POWER_STATE_DOWN 0x6 #define PCS_POWER_STATE_UP 0x4 +#define RFE_RD_FIFO_TH_3_DWORDS 0x3 + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -3272,6 +3274,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) lan743x_pci_cleanup(adapter); } +static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) +{ + u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + + if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { + u32 misc_ctl; + + misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); + misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; + misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, + RFE_RD_FIFO_TH_3_DWORDS); + lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); + } +} + static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { @@ -3287,6 +3304,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index be79cb0ae5af..645bc048e52e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) @@ -311,6 +312,9 @@ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +#define MISC_CTL_0 (0x920) +#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) + /* Vendor Specific SGMII MMD details */ #define SR_VSMMD_PCS_ID1 0x0004 #define SR_VSMMD_PCS_ID2 0x0005 diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 3a1b1a1f5a19..60dd2fd603a8 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5, bool sgmii = false, inband_aneg = false; int err; - if (port->conf.inband) { + if (conf->inband) { if (conf->portmode == PHY_INTERFACE_MODE_SGMII || conf->portmode == PHY_INTERFACE_MODE_QSGMII) inband_aneg = true; /* Cisco-SGMII in-band-aneg */ @@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5, if (err) return -EINVAL; - if (port->conf.inband) { + if (conf->inband) { /* Enable/disable 1G counters in ASM */ spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev), ASM_PORT_CFG_CSC_STAT_DIS, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index 523e0c470894..55f255a3c9db 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -36,6 +36,27 @@ struct sparx5_tc_flower_template { u16 l3_proto; /* protocol specified in the template */ }; +/* SparX-5 VCAP fragment types: + * 0 = no fragment, 1 = initial fragment, + * 2 = suspicious fragment, 3 = valid follow-up fragment + */ +enum { /* key / mask */ + FRAG_NOT = 0x03, /* 0 / 3 */ + FRAG_SOME = 0x11, /* 1 / 1 */ + FRAG_FIRST = 0x13, /* 1 / 3 */ + FRAG_LATER = 0x33, /* 3 / 3 */ + FRAG_INVAL = 0xff, /* invalid */ +}; + +/* Flower fragment flag to VCAP fragment type mapping */ +static const u8 sparx5_vcap_frag_map[4][4] = { /* is_frag */ + { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */ + { FRAG_NOT, FRAG_NOT, FRAG_INVAL, FRAG_INVAL }, /* 0/1 */ + { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */ + { FRAG_SOME, FRAG_LATER, FRAG_INVAL, FRAG_FIRST } /* 1/1 */ + /* 0/0 0/1 1/0 1/1 <-- first_frag */ +}; + static int sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st) { @@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) flow_rule_match_control(st->frule, &mt); if (mt.mask->flags) { - if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) { - if (mt.key->flags & FLOW_DIS_FIRST_FRAG) { - value = 1; /* initial fragment */ - mask = 0x3; - } else { - if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) { - value = 3; /* follow up fragment */ - mask = 0x3; - } else { - value = 0; /* no fragment */ - mask = 0x3; - } - } - } else { - if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) { - value = 3; /* follow up fragment */ - mask = 0x3; - } else { - value = 0; /* no fragment */ - mask = 0x3; - } + u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT); + u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT); + u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask; + + u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG); + u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG); + u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask; + + /* Lookup verdict based on the 2 + 2 input bits */ + u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx]; + + if (vdt == FRAG_INVAL) { + NL_SET_ERR_MSG_MOD(st->fco->common.extack, + "Match on invalid fragment flag combination"); + return -EINVAL; } + /* Extract VCAP fragment key and mask from verdict */ + value = (vdt >> 4) & 0x3; + mask = vdt & 0x3; + err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_L3_FRAGMENT_TYPE, value, mask); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 59287c6e6cee..d8af5e7e15b4 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; - *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); + *datasize = mtu + ETH_HLEN; } static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 4c043052198d..00882ffc7a02 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -73,6 +73,7 @@ enum mac_version { }; struct rtl8169_private; +struct r8169_led_classdev; void r8169_apply_firmware(struct rtl8169_private *tp); u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp); @@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx, char *buf, int buf_len); int rtl8168_get_led_mode(struct rtl8169_private *tp); int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val); -void rtl8168_init_leds(struct net_device *ndev); +struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev); int rtl8125_get_led_mode(struct rtl8169_private *tp, int index); int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode); -void rtl8125_init_leds(struct net_device *ndev); +struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev); +void r8169_remove_leds(struct r8169_led_classdev *leds); diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c index 7c5dc9d0df85..e10bee706bc6 100644 --- a/drivers/net/ethernet/realtek/r8169_leds.c +++ b/drivers/net/ethernet/realtek/r8169_leds.c @@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev, led_cdev->hw_control_get_device = r8169_led_hw_control_get_device; /* ignore errors */ - devm_led_classdev_register(&ndev->dev, led_cdev); + led_classdev_register(&ndev->dev, led_cdev); } -void rtl8168_init_leds(struct net_device *ndev) +struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev) { - /* bind resource mgmt to netdev */ - struct device *dev = &ndev->dev; struct r8169_led_classdev *leds; int i; - leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL); if (!leds) - return; + return NULL; for (i = 0; i < RTL8168_NUM_LEDS; i++) rtl8168_setup_ldev(leds + i, ndev, i); + + return leds; } static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev, @@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev, led_cdev->hw_control_get_device = r8169_led_hw_control_get_device; /* ignore errors */ - devm_led_classdev_register(&ndev->dev, led_cdev); + led_classdev_register(&ndev->dev, led_cdev); } -void rtl8125_init_leds(struct net_device *ndev) +struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev) { - /* bind resource mgmt to netdev */ - struct device *dev = &ndev->dev; struct r8169_led_classdev *leds; int i; - leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL); if (!leds) - return; + return NULL; for (i = 0; i < RTL8125_NUM_LEDS; i++) rtl8125_setup_led_ldev(leds + i, ndev, i); + + return leds; +} + +void r8169_remove_leds(struct r8169_led_classdev *leds) +{ + if (!leds) + return; + + for (struct r8169_led_classdev *l = leds; l->ndev; l++) + led_classdev_unregister(&l->led); + + kfree(leds); } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5c879a5c86d7..0fc5fe564ae5 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -647,6 +647,8 @@ struct rtl8169_private { const char *fw_name; struct rtl_fw *rtl_fw; + struct r8169_led_classdev *leds; + u32 ocp_base; }; @@ -1314,17 +1316,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); } +static void rtl_dash_loop_wait(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long usecs, int n, bool high) +{ + if (!tp->dash_enabled) + return; + rtl_loop_wait(tp, c, usecs, n, high); +} + +static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, true); +} + +static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, false); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); - rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) @@ -1338,7 +1363,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) static void rtl8168dp_driver_stop(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); - rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_stop(struct rtl8169_private *tp) @@ -1346,7 +1371,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) rtl8168ep_stop_cmac(tp); r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); } static void rtl8168_driver_stop(struct rtl8169_private *tp) @@ -5021,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev) cancel_work_sync(&tp->wk.work); + if (IS_ENABLED(CONFIG_R8169_LEDS)) + r8169_remove_leds(tp->leds); + unregister_netdev(tp->dev); if (tp->dash_type != RTL_DASH_NONE) @@ -5141,6 +5169,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) struct mii_bus *new_bus; int ret; + /* On some boards with this chip version the BIOS is buggy and misses + * to reset the PHY page selector. This results in the PHY ID read + * accessing registers on a different page, returning a more or + * less random value. Fix this by resetting the page selector first. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_25 || + tp->mac_version == RTL_GIGA_MAC_VER_26) + r8169_mdio_write(tp, 0x1f, 0); + new_bus = devm_mdiobus_alloc(&pdev->dev); if (!new_bus) return -ENOMEM; @@ -5469,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (IS_ENABLED(CONFIG_R8169_LEDS)) { if (rtl_is_8125(tp)) - rtl8125_init_leds(dev); + tp->leds = rtl8125_init_leds(dev); else if (tp->mac_version > RTL_GIGA_MAC_VER_06) - rtl8168_init_leds(dev); + tp->leds = rtl8168_init_leds(dev); } netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n", diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d1be030c8848..fcb756d77681 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -769,25 +769,28 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) dma_addr_t dma_addr; int rx_packets = 0; u8 desc_status; - u16 pkt_len; + u16 desc_len; u8 die_dt; int entry; int limit; int i; - entry = priv->cur_rx[q] % priv->num_rx_ring[q]; limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; stats = &priv->stats[q]; - desc = &priv->rx_ring[q].desc[entry]; - for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { + for (i = 0; i < limit; i++, priv->cur_rx[q]++) { + entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + desc = &priv->rx_ring[q].desc[entry]; + if (rx_packets == *quota || desc->die_dt == DT_FEMPTY) + break; + /* Descriptor type must be checked before all other reads */ dma_rmb(); desc_status = desc->msc; - pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS; + desc_len = le16_to_cpu(desc->ds_cc) & RX_DS; /* We use 0-byte descriptors to mark the DMA mapping errors */ - if (!pkt_len) + if (!desc_len) continue; if (desc_status & MSC_MC) @@ -808,25 +811,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) switch (die_dt) { case DT_FSINGLE: skb = ravb_get_skb_gbeth(ndev, entry, desc); - skb_put(skb, pkt_len); + skb_put(skb, desc_len); skb->protocol = eth_type_trans(skb, ndev); if (ndev->features & NETIF_F_RXCSUM) ravb_rx_csum_gbeth(skb); napi_gro_receive(&priv->napi[q], skb); rx_packets++; - stats->rx_bytes += pkt_len; + stats->rx_bytes += desc_len; break; case DT_FSTART: priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc); - skb_put(priv->rx_1st_skb, pkt_len); + skb_put(priv->rx_1st_skb, desc_len); break; case DT_FMID: skb = ravb_get_skb_gbeth(ndev, entry, desc); skb_copy_to_linear_data_offset(priv->rx_1st_skb, priv->rx_1st_skb->len, skb->data, - pkt_len); - skb_put(priv->rx_1st_skb, pkt_len); + desc_len); + skb_put(priv->rx_1st_skb, desc_len); dev_kfree_skb(skb); break; case DT_FEND: @@ -834,23 +837,20 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) skb_copy_to_linear_data_offset(priv->rx_1st_skb, priv->rx_1st_skb->len, skb->data, - pkt_len); - skb_put(priv->rx_1st_skb, pkt_len); + desc_len); + skb_put(priv->rx_1st_skb, desc_len); dev_kfree_skb(skb); priv->rx_1st_skb->protocol = eth_type_trans(priv->rx_1st_skb, ndev); if (ndev->features & NETIF_F_RXCSUM) - ravb_rx_csum_gbeth(skb); + ravb_rx_csum_gbeth(priv->rx_1st_skb); + stats->rx_bytes += priv->rx_1st_skb->len; napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); rx_packets++; - stats->rx_bytes += pkt_len; break; } } - - entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; - desc = &priv->rx_ring[q].desc[entry]; } /* Refill the RX ring buffers. */ @@ -891,30 +891,29 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - int entry = priv->cur_rx[q] % priv->num_rx_ring[q]; - int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) - - priv->cur_rx[q]; struct net_device_stats *stats = &priv->stats[q]; struct ravb_ex_rx_desc *desc; + unsigned int limit, i; struct sk_buff *skb; dma_addr_t dma_addr; struct timespec64 ts; + int rx_packets = 0; u8 desc_status; u16 pkt_len; - int limit; + int entry; + + limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + for (i = 0; i < limit; i++, priv->cur_rx[q]++) { + entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + desc = &priv->rx_ring[q].ex_desc[entry]; + if (rx_packets == *quota || desc->die_dt == DT_FEMPTY) + break; - boguscnt = min(boguscnt, *quota); - limit = boguscnt; - desc = &priv->rx_ring[q].ex_desc[entry]; - while (desc->die_dt != DT_FEMPTY) { /* Descriptor type must be checked before all other reads */ dma_rmb(); desc_status = desc->msc; pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS; - if (--boguscnt < 0) - break; - /* We use 0-byte descriptors to mark the DMA mapping errors */ if (!pkt_len) continue; @@ -960,12 +959,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) if (ndev->features & NETIF_F_RXCSUM) ravb_rx_csum(skb); napi_gro_receive(&priv->napi[q], skb); - stats->rx_packets++; + rx_packets++; stats->rx_bytes += pkt_len; } - - entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; - desc = &priv->rx_ring[q].ex_desc[entry]; } /* Refill the RX ring buffers. */ @@ -995,9 +991,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) desc->die_dt = DT_FEMPTY; } - *quota -= limit - (++boguscnt); - - return boguscnt <= 0; + stats->rx_packets += rx_packets; + *quota -= rx_packets; + return *quota == 0; } /* Packet receive function for Ethernet AVB */ @@ -1324,12 +1320,12 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + bool unmask; /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + unmask = !ravb_rx(ndev, "a, q); /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -1339,6 +1335,18 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + /* Receive error message handling */ + priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; + if (info->nc_queues) + priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; + if (priv->rx_over_errors != ndev->stats.rx_over_errors) + ndev->stats.rx_over_errors = priv->rx_over_errors; + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) + ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; + + if (!unmask) + goto out; + napi_complete(napi); /* Re-enable RX/TX interrupts */ @@ -1352,14 +1360,6 @@ static int ravb_poll(struct napi_struct *napi, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - /* Receive error message handling */ - priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; - if (info->nc_queues) - priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) - ndev->stats.rx_over_errors = priv->rx_over_errors; - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) - ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; out: return budget - quota; } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 475e1e8c1d35..0786eb0da391 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -50,7 +50,7 @@ * the macros available to do this only define GCC 8. */ __diag_push(); -__diag_ignore(GCC, 8, "-Woverride-init", +__diag_ignore_all("-Woverride-init", "logic to initialize all and then override some is OK"); static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { SH_ETH_OFFSET_DEFAULTS, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index a6fefe675ef1..3b7d4ac1e7be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -553,6 +553,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp; extern const struct stmmac_mode_ops dwmac4_ring_mode_ops; struct mac_link { + u32 caps; u32 speed_mask; u32 speed10; u32 speed100; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index b21d99faa2d0..e1b761dcfa1d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000; /* The loopback bit seems to be re-set when link change * Simply mask it each time * Speed 10/100/1000 are set in BIT(2)/BIT(3) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 3927609abc44..8555299443f4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv) if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000; mac->link.duplex = GMAC_CONTROL_DM; mac->link.speed10 = GMAC_CONTROL_PS; mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index a6e8d7bd9588..7667d103cd0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv) dev_info(priv->device, "\tDWMAC100\n"); mac->pcsr = priv->ioaddr; + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100; mac->link.duplex = MAC_CONTROL_F; mac->link.speed10 = 0; mac->link.speed100 = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 6b6d0de09619..a38226d7cc6a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -70,7 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw, static void dwmac4_phylink_get_caps(struct stmmac_priv *priv) { - priv->phylink_config.mac_capabilities |= MAC_2500FD; + if (priv->plat->tx_queues_to_use > 1) + priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD); + else + priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD); } static void dwmac4_rx_queue_enable(struct mac_device_info *hw, @@ -92,19 +95,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 base_register; - u32 value; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & + GMAC_RXQCTRL_PSRQX_MASK(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); + + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + base_register); - - value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); - value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & GMAC_RXQCTRL_PSRQX_MASK(queue); - writel(value, ioaddr + base_register); + + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + } } static void dwmac4_tx_queue_priority(struct mac_device_info *hw, @@ -1356,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv) if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD; mac->link.duplex = GMAC_CONFIG_DM; mac->link.speed10 = GMAC_CONFIG_PS; mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 1af2f89a0504..f8e7775bb633 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } -static void xgmac_phylink_get_caps(struct stmmac_priv *priv) -{ - priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD | - MAC_10000FD | MAC_25000FD | - MAC_40000FD | MAC_50000FD | - MAC_100000FD; -} - static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable) { u32 tx = readl(ioaddr + XGMAC_TX_CONFIG); @@ -105,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 value, reg; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & + XGMAC_PSRQ(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); + + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + reg); - value &= ~XGMAC_PSRQ(queue); - value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); + ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); - writel(value, ioaddr + reg); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + } } static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, @@ -1516,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg * const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, - .phylink_get_caps = xgmac_phylink_get_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxgmac2_rx_queue_enable, @@ -1577,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode, const struct stmmac_ops dwxlgmac2_ops = { .core_init = dwxgmac2_core_init, - .phylink_get_caps = xgmac_phylink_get_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxlgmac2_rx_queue_enable, @@ -1637,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv) if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_1000FD | MAC_2500FD | MAC_5000FD | + MAC_10000FD; mac->link.duplex = 0; mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; @@ -1674,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv) if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_1000FD | MAC_2500FD | MAC_5000FD | + MAC_10000FD | MAC_25000FD | + MAC_40000FD | MAC_50000FD | + MAC_100000FD; mac->link.duplex = 0; mac->link.speed1000 = XLGMAC_CONFIG_SS_1000; mac->link.speed2500 = XLGMAC_CONFIG_SS_2500; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h index dff02d75d519..5d1ea3e07459 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc.h +++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h @@ -52,6 +52,7 @@ struct stmmac_counters { unsigned int mmc_tx_excessdef; unsigned int mmc_tx_pause_frame; unsigned int mmc_tx_vlan_frame_g; + unsigned int mmc_tx_oversize_g; unsigned int mmc_tx_lpi_usec; unsigned int mmc_tx_lpi_tran; @@ -80,6 +81,7 @@ struct stmmac_counters { unsigned int mmc_rx_fifo_overflow; unsigned int mmc_rx_vlan_frames_gb; unsigned int mmc_rx_watchdog_error; + unsigned int mmc_rx_error; unsigned int mmc_rx_lpi_usec; unsigned int mmc_rx_lpi_tran; unsigned int mmc_rx_discard_frames_gb; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 7eb477faa75a..0fab842902a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -53,6 +53,7 @@ #define MMC_TX_EXCESSDEF 0x6c #define MMC_TX_PAUSE_FRAME 0x70 #define MMC_TX_VLAN_FRAME_G 0x74 +#define MMC_TX_OVERSIZE_G 0x78 /* MMC RX counter registers */ #define MMC_RX_FRAMECOUNT_GB 0x80 @@ -79,6 +80,13 @@ #define MMC_RX_FIFO_OVERFLOW 0xd4 #define MMC_RX_VLAN_FRAMES_GB 0xd8 #define MMC_RX_WATCHDOG_ERROR 0xdc +#define MMC_RX_ERROR 0xe0 + +#define MMC_TX_LPI_USEC 0xec +#define MMC_TX_LPI_TRAN 0xf0 +#define MMC_RX_LPI_USEC 0xf4 +#define MMC_RX_LPI_TRAN 0xf8 + /* IPC*/ #define MMC_RX_IPC_INTR_MASK 0x100 #define MMC_RX_IPC_INTR 0x108 @@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF); mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME); mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G); + mmc->mmc_tx_oversize_g += readl(mmcaddr + MMC_TX_OVERSIZE_G); + mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC); + mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN); /* MMC RX counter registers */ mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB); @@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW); mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB); mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR); + mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR); + mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC); + mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN); + /* IPv4 */ mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD); mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index e1537a57815f..542e2633a6f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = { STMMAC_MMC_STAT(mmc_tx_excessdef), STMMAC_MMC_STAT(mmc_tx_pause_frame), STMMAC_MMC_STAT(mmc_tx_vlan_frame_g), + STMMAC_MMC_STAT(mmc_tx_oversize_g), STMMAC_MMC_STAT(mmc_tx_lpi_usec), STMMAC_MMC_STAT(mmc_tx_lpi_tran), STMMAC_MMC_STAT(mmc_rx_framecount_gb), @@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = { STMMAC_MMC_STAT(mmc_rx_fifo_overflow), STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb), STMMAC_MMC_STAT(mmc_rx_watchdog_error), + STMMAC_MMC_STAT(mmc_rx_error), STMMAC_MMC_STAT(mmc_rx_lpi_usec), STMMAC_MMC_STAT(mmc_rx_lpi_tran), STMMAC_MMC_STAT(mmc_rx_discard_frames_gb), diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 24cd80490d19..7c6fb14b5555 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev) return ret; } -static void stmmac_set_half_duplex(struct stmmac_priv *priv) -{ - /* Half-Duplex can only work with single tx queue */ - if (priv->plat->tx_queues_to_use > 1) - priv->phylink_config.mac_capabilities &= - ~(MAC_10HD | MAC_100HD | MAC_1000HD); - else - priv->phylink_config.mac_capabilities |= - (MAC_10HD | MAC_100HD | MAC_1000HD); -} - static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; @@ -1236,15 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) xpcs_get_interfaces(priv->hw->xpcs, priv->phylink_config.supported_interfaces); - priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | - MAC_10FD | MAC_100FD | - MAC_1000FD; - - stmmac_set_half_duplex(priv); - /* Get the MAC specific capabilities */ stmmac_mac_phylink_get_caps(priv); + priv->phylink_config.mac_capabilities = priv->hw->link.caps; + max_speed = priv->plat->max_speed; if (max_speed) phylink_limit_mac_speed(&priv->phylink_config, max_speed); @@ -7342,6 +7327,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) { struct stmmac_priv *priv = netdev_priv(dev); int ret = 0, i; + int max_speed; if (netif_running(dev)) stmmac_release(dev); @@ -7355,7 +7341,14 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->rss.table[i] = ethtool_rxfh_indir_default(i, rx_cnt); - stmmac_set_half_duplex(priv); + stmmac_mac_phylink_get_caps(priv); + + priv->phylink_config.mac_capabilities = priv->hw->link.caps; + + max_speed = priv->plat->max_speed; + if (max_speed) + phylink_limit_mac_speed(&priv->phylink_config, max_speed); + stmmac_napi_add(dev); if (netif_running(dev)) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2939a21ca74f..1d00e21808c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { + struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; struct device *dev = common->dev; struct am65_cpsw_port *port; int ret = 0, i; @@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) if (ret) return ret; + /* The DMA Channels are not guaranteed to be in a clean state. + * Reset and disable them to ensure that they are back to the + * clean state and ready to be used. + */ + for (i = 0; i < common->tx_ch_num; i++) { + k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], + am65_cpsw_nuss_tx_cleanup); + k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); + } + + for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) + k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, + am65_cpsw_nuss_rx_cleanup, !!i); + + k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); + ret = am65_cpsw_nuss_register_devlink(common); if (ret) return ret; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 5b5d5e4310d1..2fa511227eac 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -20,6 +20,8 @@ #include "txgbe_phy.h" #include "txgbe_hw.h" +#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw" + static int txgbe_swnodes_register(struct txgbe *txgbe) { struct txgbe_nodes *nodes = &txgbe->nodes; @@ -571,8 +573,8 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", - pci_dev_id(pdev)); + snprintf(clk_name, sizeof(clk_name), "%s.%d", + TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); if (IS_ERR(clk)) @@ -634,7 +636,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe) info.parent = &pdev->dev; info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); - info.name = "i2c_designware"; + info.name = TXGBE_I2C_CLK_DEV_NAME; info.id = pci_dev_id(pdev); info.res = &DEFINE_RES_IRQ(pdev->irq); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9df39cf8b097..1072e2210aed 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1443,7 +1443,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2f6739fe78af..6c2835086b57 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; if (!gs4) @@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; if (!gs6) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index a6fcbda64ecc..2b6ec979a62f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head) int i; kfree(nvdev->extension); - vfree(nvdev->recv_buf); - vfree(nvdev->send_buf); + + if (!nvdev->recv_buf_gpadl_handle.decrypted) + vfree(nvdev->recv_buf); + if (!nvdev->send_buf_gpadl_handle.decrypted) + vfree(nvdev->send_buf); bitmap_free(nvdev->send_section_map); for (i = 0; i < VRSS_CHANNEL_MAX; i++) { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 8b8634600c51..ddb50a0e2bc8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2431,6 +2431,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct lan8814_ptp_rx_ts *rx_ts, *tmp; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; + int tx_mod; ptp_priv->hwts_tx_type = config->tx_type; ptp_priv->rx_filter = config->rx_filter; @@ -2477,9 +2478,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); - if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); + if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, + tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } if (config->rx_filter != HWTSTAMP_FILTER_NONE) lan8814_config_ts_intr(ptp_priv->phydev, true); @@ -2537,7 +2543,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, } } -static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2547,7 +2553,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ptp_header = ptp_parse_header(skb, type); skb_pull_inline(skb, ETH_HLEN); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2559,7 +2569,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, bool ret = false; u16 skb_sig; - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + return ret; /* Iterate over all RX timestamps and match it with the received skbs */ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); @@ -2834,7 +2845,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) return 0; } -static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2842,7 +2853,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) type = ptp_classify_raw(skb); ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2856,7 +2871,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { - lan8814_get_sig_tx(skb, &skb_sig); + if (!lan8814_get_sig_tx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) continue; @@ -2910,7 +2926,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) continue; diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 4717c59d51d0..e79657f76bea 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -797,7 +797,7 @@ static int at8031_parse_dt(struct phy_device *phydev) static int at8031_probe(struct phy_device *phydev) { - struct at803x_priv *priv = phydev->priv; + struct at803x_priv *priv; int mode_cfg; int ccr; int ret; @@ -806,6 +806,8 @@ static int at8031_probe(struct phy_device *phydev) if (ret) return ret; + priv = phydev->priv; + /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping * options. */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0b3f21cba552..92da8c03d960 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2125,14 +2125,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, tun_is_little_endian(tun), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); - pr_err("unexpected GSO type: " - "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + + if (net_ratelimit()) { + netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), + tun16_to_cpu(tun, gso.hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + } WARN_ON_ONCE(1); return -EINVAL; } diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 88e084534853..752f821a1990 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) if (is_valid_ether_addr(mac)) { eth_hw_addr_set(dev->net, mac); + if (!is_local_ether_addr(mac)) + dev->net->addr_assign_type = NET_ADDR_PERM; } else { netdev_info(dev->net, "invalid MAC address, using random\n"); eth_hw_addr_random(dev->net); @@ -1315,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); + ax88179_reset(dev); + return 0; } @@ -1693,7 +1697,6 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1706,7 +1709,6 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e2e181378f41..edc34402e787 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1431,6 +1431,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ + {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c22d1118a133..115c3c5414f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); + bool update = false; int i; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && @@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; if (rxfh->indir) { + if (!vi->has_rss) + return -EOPNOTSUPP; + for (i = 0; i < vi->rss_indir_table_size; ++i) vi->ctrl->rss.indirection_table[i] = rxfh->indir[i]; + update = true; } - if (rxfh->key) - memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size); - virtnet_commit_rss_command(vi); + if (rxfh->key) { + /* If either _F_HASH_REPORT or _F_RSS are negotiated, the + * device provides hash calculation capabilities, that is, + * hash_key is configured. + */ + if (!vi->has_rss && !vi->has_rss_hash_report) + return -EOPNOTSUPP; + + memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size); + update = true; + } + + if (update) + virtnet_commit_rss_command(vi); return 0; } @@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { vi->has_rss = true; - if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); + } + + if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index db6d7013df66..c3bdf433d8f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -3081,8 +3081,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) struct iwl_fw_dbg_params params = {0}; struct iwl_fwrt_dump_data *dump_data = &fwrt->dump.wks[wk_idx].dump_data; - u32 policy; - u32 time_point; if (!test_bit(wk_idx, &fwrt->dump.active_wks)) return; @@ -3113,13 +3111,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false); - policy = le32_to_cpu(dump_data->trig->apply_policy); - time_point = le32_to_cpu(dump_data->trig->time_point); + if (iwl_trans_dbg_ini_valid(fwrt->trans)) { + u32 policy = le32_to_cpu(dump_data->trig->apply_policy); + u32 time_point = le32_to_cpu(dump_data->trig->time_point); - if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { - IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); - iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { + IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); + iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + } } + if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) iwl_force_nmi(fwrt->trans); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 553c6fffc7c6..52518a47554e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1260,15 +1260,15 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, if (IS_ERR_OR_NULL(vif)) return 1; - if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) { + if (hweight16(vif->active_links) > 1) { /* - * Select the 'best' link. May need to revisit, it seems - * better to not optimize for throughput but rather range, - * reliability and power here - and select 2.4 GHz ... + * Select the 'best' link. + * May need to revisit, it seems better to not optimize + * for throughput but rather range, reliability and + * power here - and select 2.4 GHz ... */ - primary_link = - iwl_mvm_mld_get_primary_link(mvm, vif, - vif->active_links); + primary_link = iwl_mvm_mld_get_primary_link(mvm, vif, + vif->active_links); if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n", vif->active_links)) @@ -1277,6 +1277,8 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, ret = ieee80211_set_active_links(vif, BIT(primary_link)); if (ret) return ret; + } else if (vif->active_links) { + primary_link = __ffs(vif->active_links); } else { primary_link = 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 51b01f7528be..7fe57ecd0682 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -748,7 +748,9 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct dentry *dbgfs_dir = vif->debugfs_dir; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - char buf[100]; + char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) + + (7 + IFNAMSIZ + 1) + 6 + 1]; + char name[7 + IFNAMSIZ + 1]; /* this will happen in monitor mode */ if (!dbgfs_dir) @@ -761,10 +763,11 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * find * netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/ */ - snprintf(buf, 100, "../../../%pd3/iwlmvm", dbgfs_dir); + snprintf(name, sizeof(name), "%pd", dbgfs_dir); + snprintf(buf, sizeof(buf), "../../../%pd3/iwlmvm", dbgfs_dir); - mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name, - mvm->debugfs_dir, buf); + mvmvif->dbgfs_slink = + debugfs_create_symlink(name, mvm->debugfs_dir, buf); } void iwl_mvm_vif_dbgfs_rm_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index f13f13e6b71a..9f69e04594e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -46,6 +46,27 @@ static int iwl_mvm_link_cmd_send(struct iwl_mvm *mvm, return ret; } +int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_vif_link_info *link_info = + mvmvif->link[link_conf->link_id]; + + if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) { + link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm, + mvmvif); + if (link_info->fw_link_id >= + ARRAY_SIZE(mvm->link_id_to_link_conf)) + return -EINVAL; + + rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id], + link_conf); + } + + return 0; +} + int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { @@ -55,19 +76,14 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_link_config_cmd cmd = {}; unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD); u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1); + int ret; if (WARN_ON_ONCE(!link_info)) return -EINVAL; - if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) { - link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm, - mvmvif); - if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)) - return -EINVAL; - - rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id], - link_conf); - } + ret = iwl_mvm_set_link_mapping(mvm, vif, link_conf); + if (ret) + return ret; /* Update SF - Disable if needed. if this fails, SF might still be on * while many macs are bound, which is forbidden - so fail the binding. @@ -248,6 +264,24 @@ send_cmd: return ret; } +int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_vif_link_info *link_info = + mvmvif->link[link_conf->link_id]; + + /* mac80211 thought we have the link, but it was never configured */ + if (WARN_ON(!link_info || + link_info->fw_link_id >= + ARRAY_SIZE(mvm->link_id_to_link_conf))) + return -EINVAL; + + RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], + NULL); + return 0; +} + int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { @@ -257,13 +291,10 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_link_config_cmd cmd = {}; int ret; - /* mac80211 thought we have the link, but it was never configured */ - if (WARN_ON(!link_info || - link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))) + ret = iwl_mvm_unset_link_mapping(mvm, vif, link_conf); + if (ret) return 0; - RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], - NULL); cmd.link_id = cpu_to_le32(link_info->fw_link_id); iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1935630d3def..8f4b063d6243 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -360,7 +360,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (mvm->mld_api_is_used && mvm->nvm_data->sku_cap_11be_enable && !iwlwifi_mod_params.disable_11ax && !iwlwifi_mod_params.disable_11be) - hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO; + hw->wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT; /* With MLD FW API, it tracks timing by itself, * no need for any timing from the host @@ -1577,8 +1577,14 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, mvmvif->mvm = mvm; /* the first link always points to the default one */ + mvmvif->deflink.fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; + mvmvif->deflink.active = 0; mvmvif->link[0] = &mvmvif->deflink; + ret = iwl_mvm_set_link_mapping(mvm, vif, &vif->bss_conf); + if (ret) + goto out; + /* * Not much to do here. The stack will not allow interface * types or combinations that we didn't advertise, so we @@ -1783,6 +1789,7 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, mvm->p2p_device_vif = NULL; } + iwl_mvm_unset_link_mapping(mvm, vif, &vif->bss_conf); iwl_mvm_mac_ctxt_remove(mvm, vif); RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index 1628bf55458f..23e64a757cfe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -855,10 +855,15 @@ int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id) { - int ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id); + int ret; lockdep_assert_held(&mvm->mutex); + if (WARN_ON(sta_id == IWL_MVM_INVALID_STA)) + return 0; + + ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id); + RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL); RCU_INIT_POINTER(mvm->fw_id_to_link_sta[sta_id], NULL); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 44571114fb15..f0b24f00938b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1916,11 +1916,15 @@ int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); u32 iwl_mvm_get_lmac_id(struct iwl_mvm *mvm, enum nl80211_band band); /* Links */ +int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, u32 changes, bool active); +int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int iwl_mvm_disable_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index 2ecd32bed752..045c862a8fc4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); + if (!resp) return ERR_PTR(-ENOMEM); - iwl_free_resp(&cmd); return resp; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1484eaedf452..ce8d83c771a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, struct napi_struct *napi, struct sk_buff *skb, int queue, - struct ieee80211_sta *sta, - struct ieee80211_link_sta *link_sta) + struct ieee80211_sta *sta) { if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) { kfree_skb(skb); return; } - if (sta && sta->valid_links && link_sta) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - - rx_status->link_valid = 1; - rx_status->link_id = link_sta->link_id; - } - ieee80211_rx_napi(mvm->hw, sta, skb, napi); } @@ -588,7 +580,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, - sta, NULL /* FIXME */); + sta); reorder_buf->num_stored--; } } @@ -2213,6 +2205,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (IS_ERR(sta)) sta = NULL; link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]); + + if (sta && sta->valid_links && link_sta) { + rx_status->link_valid = 1; + rx_status->link_id = link_sta->link_id; + } } } else if (!is_multicast_ether_addr(hdr->addr2)) { /* @@ -2356,8 +2353,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) rx_status->flag |= RX_FLAG_AMSDU_MORE; - iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, - link_sta); + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); } out: rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index a59d264a11c5..ad960faceb0d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -879,9 +879,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data; unsigned int ver = - iwl_fw_lookup_cmd_ver(mvm->fw, - WIDE_ID(MAC_CONF_GROUP, - SESSION_PROTECTION_CMD), 2); + iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP, + SESSION_PROTECTION_NOTIF, 2); int id = le32_to_cpu(notif->mac_link_id); struct ieee80211_vif *vif; struct iwl_mvm_vif *mvmvif; diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index 33973a60d0bf..6229c785c845 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -1589,9 +1589,9 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn, return; tfd_num = iwl_txq_get_cmd_index(txq, ssn); - read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr); spin_lock_bh(&txq->lock); + read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr); if (!test_bit(txq_id, trans->txqs.queue_used)) { IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n", diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 367459bd1345..708132d5be2a 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2233,7 +2233,7 @@ static void rtw8922a_btc_init_cfg(struct rtw89_dev *rtwdev) * Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff) */ if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path) - rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff); + rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f); else rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff); diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c index 9f43f256db1d..f0a4783baf1f 100644 --- a/drivers/net/wwan/t7xx/t7xx_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_cldma.c @@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) { u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; - return ioread64(hw_info->ap_pdn_base + offset); + return ioread64_lo_hi(hw_info->ap_pdn_base + offset); } void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, @@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; - iowrite64(address, reg + offset); + iowrite64_lo_hi(address, reg + offset); } void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index abc41a7089fa..97163e1e5783 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -137,8 +137,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool return -ENODEV; } - gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + + REG_CLDMA_DL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) return 0; @@ -316,8 +317,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; /* Check current processing TGPD, 64-bit address is in a table by Q index */ - ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr != ul_curr_addr) { spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c index 76da4c15e3de..f071ec7ff23d 100644 --- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c @@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; - iowrite64(0, reg); + iowrite64_lo_hi(0, reg); } } @@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; iowrite32(cfg->trsl_id, reg); reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); /* Ensure ATR is set */ - ioread64(reg); + ioread64_lo_hi(reg); return 0; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ad29f370034e..8d2aee88526c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 943d72bdd794..27281a9a8951 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2076,6 +2076,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; struct queue_limits lim; struct nvme_id_ns_nvm *nvm = NULL; + struct nvme_zone_info zi = {}; struct nvme_id_ns *id; sector_t capacity; unsigned lbaf; @@ -2088,9 +2089,10 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (id->ncap == 0) { /* namespace not allocated or attached */ info->is_removed = true; - ret = -ENODEV; + ret = -ENXIO; goto out; } + lbaf = nvme_lbaf_index(id->flbas); if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) { ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm); @@ -2098,8 +2100,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + ns->head->ids.csi == NVME_CSI_ZNS) { + ret = nvme_query_zone_info(ns, lbaf, &zi); + if (ret < 0) + goto out; + } + blk_mq_freeze_queue(ns->disk->queue); - lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); @@ -2112,13 +2120,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, capacity = 0; nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - ns->head->ids.csi == NVME_CSI_ZNS) { - ret = nvme_update_zone_info(ns, lbaf, &lim); - if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } - } + ns->head->ids.csi == NVME_CSI_ZNS) + nvme_update_zone_info(ns, &lim, &zi); ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { blk_mq_unfreeze_queue(ns->disk->queue); @@ -2201,6 +2204,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) } if (!ret && nvme_ns_head_multipath(ns->head)) { + struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; blk_mq_freeze_queue(ns->head->disk->queue); @@ -2212,7 +2216,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); + /* + * queue_limits mixes values that are the hardware limitations + * for bio splitting with what is the device configuration. + * + * For NVMe the device configuration can change after e.g. a + * Format command, and we really want to pick up the new format + * value here. But we must still stack the queue limits to the + * least common denominator for multipathing to split the bios + * properly. + * + * To work around this, we explicitly set the device + * configuration to those that we just queried, but only stack + * the splitting limits in to make sure we still obey possibly + * lower limitations of other controllers. + */ lim = queue_limits_start_update(ns->head->disk->queue); + lim.logical_block_size = ns_lim->logical_block_size; + lim.physical_block_size = ns_lim->physical_block_size; + lim.io_min = ns_lim->io_min; + lim.io_opt = ns_lim->io_opt; queue_limits_stack_bdev(&lim, ns->disk->part0, 0, ns->head->disk->disk_name); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 68a5d971657b..a5b29e9ad342 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2428,7 +2428,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) +nvme_fc_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); @@ -3384,7 +3384,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, - .free_ctrl = nvme_fc_nvme_ctrl_freed, + .free_ctrl = nvme_fc_free_ctrl, .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_delete_ctrl, .get_address = nvmf_get_address, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 24193fcb8bd5..d0ed64dc7380 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1036,10 +1036,18 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) } #endif /* CONFIG_NVME_MULTIPATH */ +struct nvme_zone_info { + u64 zone_size; + unsigned int max_open_zones; + unsigned int max_active_zones; +}; + int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, - struct queue_limits *lim); +int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct nvme_zone_info *zi); +void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, + struct nvme_zone_info *zi); #ifdef CONFIG_BLK_DEV_ZONED blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd, diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 722384bcc765..77aa0f440a6d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -35,8 +35,8 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) return 0; } -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, - struct queue_limits *lim) +int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct nvme_zone_info *zi) { struct nvme_effects_log *log = ns->head->effects; struct nvme_command c = { }; @@ -89,27 +89,34 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, goto free_data; } - ns->head->zsze = - nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze)); - if (!is_power_of_2(ns->head->zsze)) { + zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze); + if (!is_power_of_2(zi->zone_size)) { dev_warn(ns->ctrl->device, - "invalid zone size:%llu for namespace:%u\n", - ns->head->zsze, ns->head->ns_id); + "invalid zone size: %llu for namespace: %u\n", + zi->zone_size, ns->head->ns_id); status = -ENODEV; goto free_data; } + zi->max_open_zones = le32_to_cpu(id->mor) + 1; + zi->max_active_zones = le32_to_cpu(id->mar) + 1; - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); - lim->zoned = 1; - lim->max_open_zones = le32_to_cpu(id->mor) + 1; - lim->max_active_zones = le32_to_cpu(id->mar) + 1; - lim->chunk_sectors = ns->head->zsze; - lim->max_zone_append_sectors = ns->ctrl->max_zone_append; free_data: kfree(id); return status; } +void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, + struct nvme_zone_info *zi) +{ + lim->zoned = 1; + lim->max_open_zones = zi->max_open_zones; + lim->max_active_zones = zi->max_active_zones; + lim->max_zone_append_sectors = ns->ctrl->max_zone_append; + lim->chunk_sectors = ns->head->zsze = + nvme_lba_to_sect(ns->head, zi->zone_size); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); +} + static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, unsigned int nr_zones, size_t *buflen) { diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 77a6e817b315..a2325330bf22 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1613,6 +1613,11 @@ static struct config_group *nvmet_subsys_make(struct config_group *group, return ERR_PTR(-EINVAL); } + if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) { + pr_err("can't create subsystem using unique discovery NQN\n"); + return ERR_PTR(-EINVAL); + } + subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); if (IS_ERR(subsys)) return ERR_CAST(subsys); @@ -2159,7 +2164,49 @@ static const struct config_item_type nvmet_hosts_type = { static struct config_group nvmet_hosts_group; +static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn); +} + +static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item, + const char *page, size_t count) +{ + struct list_head *entry; + size_t len; + + len = strcspn(page, "\n"); + if (!len || len > NVMF_NQN_FIELD_LEN - 1) + return -EINVAL; + + down_write(&nvmet_config_sem); + list_for_each(entry, &nvmet_subsystems_group.cg_children) { + struct config_item *item = + container_of(entry, struct config_item, ci_entry); + + if (!strncmp(config_item_name(item), page, len)) { + pr_err("duplicate NQN %s\n", config_item_name(item)); + up_write(&nvmet_config_sem); + return -EINVAL; + } + } + memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN); + memcpy(nvmet_disc_subsys->subsysnqn, page, len); + up_write(&nvmet_config_sem); + + return len; +} + +CONFIGFS_ATTR(nvmet_root_, discovery_nqn); + +static struct configfs_attribute *nvmet_root_attrs[] = { + &nvmet_root_attr_discovery_nqn, + NULL, +}; + static const struct config_item_type nvmet_root_type = { + .ct_attrs = nvmet_root_attrs, .ct_owner = THIS_MODULE, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6bbe4df0166c..8860a3eb71ec 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1541,6 +1541,13 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, } down_read(&nvmet_config_sem); + if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, + NVMF_NQN_SIZE)) { + if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { + up_read(&nvmet_config_sem); + return nvmet_disc_subsys; + } + } list_for_each_entry(p, &port->subsystems, entry) { if (!strncmp(p->subsys->subsysnqn, subsysnqn, NVMF_NQN_SIZE)) { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index fd229f310c93..337ee1cb09ae 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1115,16 +1115,21 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) } static bool -nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id) +nvmet_fc_assoc_exists(struct nvmet_fc_tgtport *tgtport, u64 association_id) { struct nvmet_fc_tgt_assoc *a; + bool found = false; + rcu_read_lock(); list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) { - if (association_id == a->association_id) - return true; + if (association_id == a->association_id) { + found = true; + break; + } } + rcu_read_unlock(); - return false; + return found; } static struct nvmet_fc_tgt_assoc * @@ -1164,13 +1169,11 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) ran = ran << BYTES_FOR_QID_SHIFT; spin_lock_irqsave(&tgtport->lock, flags); - rcu_read_lock(); - if (!nvmet_fc_assoc_exits(tgtport, ran)) { + if (!nvmet_fc_assoc_exists(tgtport, ran)) { assoc->association_id = ran; list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); done = true; } - rcu_read_unlock(); spin_unlock_irqrestore(&tgtport->lock, flags); } while (!done); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 3bf27052832f..4d57a4e34105 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "OF: " fmt +#include #include #include #include @@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs) { struct of_changeset_entry *ce, *cen; + /* + * When a device is deleted, the device links to/from it are also queued + * for deletion. Until these device links are freed, the devices + * themselves aren't freed. If the device being deleted is due to an + * overlay change, this device might be holding a reference to a device + * node that will be freed. So, wait until all already pending device + * links are deleted before freeing a device node. This ensures we don't + * free any device node that has a non-zero reference count. + */ + device_link_wait_removal(); + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) __of_changeset_entry_destroy(ce); } diff --git a/drivers/of/module.c b/drivers/of/module.c index 0e8aa974f0f2..f58e624953a2 100644 --- a/drivers/of/module.c +++ b/drivers/of/module.c @@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len) ssize_t csize; ssize_t tsize; + /* + * Prevent a kernel oops in vsnprintf() -- it only allows passing a + * NULL ptr when the length is also 0. Also filter out the negative + * lengths... + */ + if ((len > 0 && !str) || len < 0) + return -EINVAL; + /* Name & Type */ /* %p eats all alphanum characters, so %c must be used here */ csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T', diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index bf4833221816..eff7f5df08e2 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3765,14 +3765,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset); */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset); -/* - * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus - * Reset and requires a power-off or suspend/resume and rescan. Prevent - * use of that reset. - */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset); - /* * Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS * automatically disables LTSSM when Secondary Bus Reset is received and diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index c78a6fd6c57f..b4efdddb2ad9 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event) u64 event_config = 0; uint64_t cmask; + /* driver does not support branch stack sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + hwc->flags = 0; mapped_event = rvpmu->event_map(event, &event_config); if (mapped_event < 0) { diff --git a/drivers/pinctrl/aspeed/Makefile b/drivers/pinctrl/aspeed/Makefile index 489ea1778353..db2a7600ae2b 100644 --- a/drivers/pinctrl/aspeed/Makefile +++ b/drivers/pinctrl/aspeed/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # Aspeed pinctrl support -ccflags-y += $(call cc-option,-Woverride-init) +ccflags-y += -Woverride-init obj-$(CONFIG_PINCTRL_ASPEED) += pinctrl-aspeed.o pinmux-aspeed.o obj-$(CONFIG_PINCTRL_ASPEED_G4) += pinctrl-aspeed-g4.o obj-$(CONFIG_PINCTRL_ASPEED_G5) += pinctrl-aspeed-g5.o diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 49f89b70dcec..7f66ec73199a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev) } ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler, - IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev); + IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev); if (ret) goto out2; diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c index 8ea867c2a01a..62bc24f6dcc7 100644 --- a/drivers/platform/chrome/cros_ec_uart.c +++ b/drivers/platform/chrome/cros_ec_uart.c @@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) if (!ec_dev) return -ENOMEM; - ret = devm_serdev_device_open(dev, serdev); - if (ret) { - dev_err(dev, "Unable to open UART device"); - return ret; - } - serdev_device_set_drvdata(serdev, ec_dev); init_waitqueue_head(&ec_uart->response.wait_queue); @@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) return ret; } - ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate); - if (ret < 0) { - dev_err(dev, "Failed to set up host baud rate (%d)", ret); - return ret; - } - - serdev_device_set_flow_control(serdev, ec_uart->flowcontrol); - /* Initialize ec_dev for cros_ec */ ec_dev->phys_name = dev_name(dev); ec_dev->dev = dev; @@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev) serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops); + ret = devm_serdev_device_open(dev, serdev); + if (ret) { + dev_err(dev, "Unable to open UART device"); + return ret; + } + + ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate); + if (ret < 0) { + dev_err(dev, "Failed to set up host baud rate (%d)", ret); + return ret; + } + + serdev_device_set_flow_control(serdev, ec_uart->flowcontrol); + return cros_ec_register(ec_dev); } diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index ee2e164f86b9..38c932df6446 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -597,6 +597,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = { }, .driver_data = &quirk_acer_predator_v4, }, + { + .callback = dmi_matched, + .ident = "Acer Predator PH18-71", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"), + }, + .driver_data = &quirk_acer_predator_v4, + }, { .callback = set_force_caps, .ident = "Acer Aspire Switch 10E SW3-016", diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index b456370166b6..b4f49720c87f 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -208,6 +208,15 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BIOS_VERSION, "03.03"), } }, + { + .ident = "Framework Laptop 13 (Phoenix)", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Framework"), + DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"), + DMI_MATCH(DMI_BIOS_VERSION, "03.05"), + } + }, {} }; diff --git a/drivers/platform/x86/amd/pmf/Makefile b/drivers/platform/x86/amd/pmf/Makefile index 6b26e48ce8ad..7d6079b02589 100644 --- a/drivers/platform/x86/amd/pmf/Makefile +++ b/drivers/platform/x86/amd/pmf/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_AMD_PMF) += amd-pmf.o amd-pmf-objs := core.o acpi.o sps.o \ auto-mode.o cnqf.o \ - tee-if.o spc.o + tee-if.o spc.o pmf-quirks.o diff --git a/drivers/platform/x86/amd/pmf/acpi.c b/drivers/platform/x86/amd/pmf/acpi.c index d0cf46e2fc8e..1157ec148880 100644 --- a/drivers/platform/x86/amd/pmf/acpi.c +++ b/drivers/platform/x86/amd/pmf/acpi.c @@ -343,7 +343,10 @@ static int apmf_if_verify_interface(struct amd_pmf_dev *pdev) if (err) return err; - pdev->supported_func = output.supported_functions; + /* only set if not already set by a quirk */ + if (!pdev->supported_func) + pdev->supported_func = output.supported_functions; + dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n", output.supported_functions, output.notification_mask, output.version); @@ -437,7 +440,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev) status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev); if (ACPI_FAILURE(status)) { - dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status); + dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status); return -EINVAL; } diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 5d4f80698a8b..64e6e34a2a9a 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -445,6 +445,7 @@ static int amd_pmf_probe(struct platform_device *pdev) mutex_init(&dev->lock); mutex_init(&dev->update_mutex); + amd_pmf_quirks_init(dev); apmf_acpi_init(dev); platform_set_drvdata(pdev, dev); amd_pmf_dbgfs_register(dev); diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c new file mode 100644 index 000000000000..0b2eb0ae85fe --- /dev/null +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Platform Management Framework Driver Quirks + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Mario Limonciello + */ + +#include + +#include "pmf.h" + +struct quirk_entry { + u32 supported_func; +}; + +static struct quirk_entry quirk_no_sps_bug = { + .supported_func = 0x4003, +}; + +static const struct dmi_system_id fwbug_list[] = { + { + .ident = "ROG Zephyrus G14", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"), + }, + .driver_data = &quirk_no_sps_bug, + }, + {} +}; + +void amd_pmf_quirks_init(struct amd_pmf_dev *dev) +{ + const struct dmi_system_id *dmi_id; + struct quirk_entry *quirks; + + dmi_id = dmi_first_match(fwbug_list); + if (!dmi_id) + return; + + quirks = dmi_id->driver_data; + if (quirks->supported_func) { + dev->supported_func = quirks->supported_func; + pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n", + dmi_id->ident); + } +} + diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 8c4df5753f40..eeedd0c0395a 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -720,4 +720,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev); void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in); void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in); +/* Quirk infrastructure */ +void amd_pmf_quirks_init(struct amd_pmf_dev *dev); + #endif /* PMF_H */ diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 7457ca2b27a6..c7a827645864 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = { {"INTC1076", 0}, {"INTC1077", 0}, {"INTC1078", 0}, + {"INTC107B", 0}, + {"INTC10CB", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); @@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) struct platform_device *device = context; struct intel_hid_priv *priv = dev_get_drvdata(&device->dev); unsigned long long ev_index; + struct key_entry *ke; int err; /* @@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) if (event == 0xc0 || !priv->array) return; - if (!sparse_keymap_entry_from_scancode(priv->array, event)) { + ke = sparse_keymap_entry_from_scancode(priv->array, event); + if (!ke) { dev_info(&device->dev, "unknown event 0x%x\n", event); return; } + if (ke->type == KE_IGNORE) + return; + wakeup: pm_wakeup_hard_event(&device->dev); diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index 08df9494603c..30951f7131cd 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -719,6 +719,7 @@ static struct miscdevice isst_if_char_driver = { }; static const struct x86_cpu_id hpm_cpu_ids[] = { + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, NULL), X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL), X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL), {} diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index bd75d61ff8a6..ef730200a04b 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -29,7 +29,7 @@ #include "uncore-frequency-common.h" #define UNCORE_MAJOR_VERSION 0 -#define UNCORE_MINOR_VERSION 1 +#define UNCORE_MINOR_VERSION 2 #define UNCORE_HEADER_INDEX 0 #define UNCORE_FABRIC_CLUSTER_OFFSET 8 @@ -329,7 +329,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ goto remove_clusters; } - if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION) + if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION) dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n", TPMI_MINOR_VERSION(pd_info->ufs_header_ver)); diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 084c355c86f5..79bb2c801daa 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(&device->dev); - ret = input_register_device(priv->switches_dev); if (ret) return ret; @@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) { - unsigned long long vgbs; - acpi_status status; - /* See dual_accel_detect.h for more info */ if (dual_accel) return false; @@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) if (!dmi_check_system(dmi_switches_allow_list)) return false; - status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs); - return ACPI_SUCCESS(status); + return acpi_has_method(handle, "VGBS"); } static int intel_vbtn_probe(struct platform_device *device) @@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device) if (ACPI_FAILURE(status)) dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status); } + // Check switches after buttons since VBDL may have side effects. + if (has_switches) + detect_tablet_mode(&device->dev); device_init_wakeup(&device->dev, true); /* diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index ad3c39e9e9f5..e714ee6298dd 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device) default: year = 2019; } - pr_info("product: %s year: %d\n", product, year); + pr_info("product: %s year: %d\n", product ?: "unknown", year); if (year >= 2019) battery_limit_use_wmbb = 1; diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 291f14ef6702..77244c9aa60d 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = { { KE_KEY, 0xb32, { KEY_NEXTSONG } }, { KE_KEY, 0xb33, { KEY_PLAYPAUSE } }, { KE_KEY, 0xb5a, { KEY_MEDIA } }, + { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */ { KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */ { KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */ { KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */ @@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event) (dev->kbd_mode == SCI_KBD_MODE_ON) ? LED_FULL : LED_OFF); break; + case 0x8e: /* Power button pressed */ + break; case 0x85: /* Unknown */ case 0x8d: /* Unknown */ - case 0x8e: /* Unknown */ case 0x94: /* Unknown */ case 0x95: /* Unknown */ default: diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index d70f793ce4b3..403525cc1783 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -443,7 +443,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args) if (IS_ERR(pwm)) return pwm; - if (args->args_count > 1) + if (args->args_count > 0) pwm->args.period = args->args[0]; pwm->args.polarity = PWM_POLARITY_NORMAL; diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c index 043736972cb9..c8425493b95d 100644 --- a/drivers/pwm/pwm-dwc-core.c +++ b/drivers/pwm/pwm-dwc-core.c @@ -172,7 +172,6 @@ struct pwm_chip *dwc_pwm_alloc(struct device *dev) dwc->clk_ns = 10; chip->ops = &dwc_pwm_ops; - dev_set_drvdata(dev, chip); return chip; } EXPORT_SYMBOL_GPL(dwc_pwm_alloc); diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c index 676eaf8d7a53..fb3eadf6fbc4 100644 --- a/drivers/pwm/pwm-dwc.c +++ b/drivers/pwm/pwm-dwc.c @@ -31,26 +31,34 @@ static const struct dwc_pwm_info ehl_pwm_info = { .size = 0x1000, }; -static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset) +static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx) { struct pwm_chip *chip; struct dwc_pwm *dwc; + int ret; chip = dwc_pwm_alloc(dev); if (IS_ERR(chip)) return PTR_ERR(chip); dwc = to_dwc_pwm(chip); - dwc->base = base + offset; + dwc->base = ddata->io_base + (ddata->info->size * idx); - return devm_pwmchip_add(dev, chip); + ret = devm_pwmchip_add(dev, chip); + if (ret) + return ret; + + ddata->chips[idx] = chip; + return 0; } static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id) { const struct dwc_pwm_info *info; struct device *dev = &pci->dev; - int i, ret; + struct dwc_pwm_drvdata *ddata; + unsigned int idx; + int ret; ret = pcim_enable_device(pci); if (ret) @@ -63,17 +71,25 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id) return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n"); info = (const struct dwc_pwm_info *)id->driver_data; + ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL); + if (!ddata) + return -ENOMEM; - for (i = 0; i < info->nr; i++) { - /* - * No need to check for pcim_iomap_table() failure, - * pcim_iomap_regions() already does it for us. - */ - ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size); + /* + * No need to check for pcim_iomap_table() failure, + * pcim_iomap_regions() already does it for us. + */ + ddata->io_base = pcim_iomap_table(pci)[0]; + ddata->info = info; + + for (idx = 0; idx < ddata->info->nr; idx++) { + ret = dwc_pwm_init_one(dev, ddata, idx); if (ret) return ret; } + dev_set_drvdata(dev, ddata); + pm_runtime_put(dev); pm_runtime_allow(dev); @@ -88,19 +104,24 @@ static void dwc_pwm_remove(struct pci_dev *pci) static int dwc_pwm_suspend(struct device *dev) { - struct pwm_chip *chip = dev_get_drvdata(dev); - struct dwc_pwm *dwc = to_dwc_pwm(chip); - int i; + struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev); + unsigned int idx; - for (i = 0; i < DWC_TIMERS_TOTAL; i++) { - if (chip->pwms[i].state.enabled) { - dev_err(dev, "PWM %u in use by consumer (%s)\n", - i, chip->pwms[i].label); - return -EBUSY; + for (idx = 0; idx < ddata->info->nr; idx++) { + struct pwm_chip *chip = ddata->chips[idx]; + struct dwc_pwm *dwc = to_dwc_pwm(chip); + unsigned int i; + + for (i = 0; i < DWC_TIMERS_TOTAL; i++) { + if (chip->pwms[i].state.enabled) { + dev_err(dev, "PWM %u in use by consumer (%s)\n", + i, chip->pwms[i].label); + return -EBUSY; + } + dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i)); + dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i)); + dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i)); } - dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i)); - dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i)); - dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i)); } return 0; @@ -108,14 +129,19 @@ static int dwc_pwm_suspend(struct device *dev) static int dwc_pwm_resume(struct device *dev) { - struct pwm_chip *chip = dev_get_drvdata(dev); - struct dwc_pwm *dwc = to_dwc_pwm(chip); - int i; + struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev); + unsigned int idx; - for (i = 0; i < DWC_TIMERS_TOTAL; i++) { - dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i)); - dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i)); - dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i)); + for (idx = 0; idx < ddata->info->nr; idx++) { + struct pwm_chip *chip = ddata->chips[idx]; + struct dwc_pwm *dwc = to_dwc_pwm(chip); + unsigned int i; + + for (i = 0; i < DWC_TIMERS_TOTAL; i++) { + dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i)); + dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i)); + dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i)); + } } return 0; diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h index a8b074841ae8..c6e2df5a6122 100644 --- a/drivers/pwm/pwm-dwc.h +++ b/drivers/pwm/pwm-dwc.h @@ -38,6 +38,12 @@ struct dwc_pwm_info { unsigned int size; }; +struct dwc_pwm_drvdata { + const struct dwc_pwm_info *info; + void __iomem *io_base; + struct pwm_chip *chips[]; +}; + struct dwc_pwm_ctx { u32 cnt; u32 cnt2; diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index d79a96679a26..d6596583ed4e 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -284,9 +284,9 @@ static int img_pwm_probe(struct platform_device *pdev) return PTR_ERR(imgchip->sys_clk); } - imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip"); + imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm"); if (IS_ERR(imgchip->pwm_clk)) { - dev_err(&pdev->dev, "failed to get imgchip clock\n"); + dev_err(&pdev->dev, "failed to get pwm clock\n"); return PTR_ERR(imgchip->pwm_clk); } diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 2f4ac9591c8f..271dfad05d68 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -150,6 +150,8 @@ static unsigned int max_nr_fru; /* Total length of record including headers and list of descriptor entries. */ static size_t max_rec_len; +#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255)) + /* Total number of SPA entries across all FRUs. */ static unsigned int spa_nr_entries; @@ -475,6 +477,16 @@ static void set_rec_fields(struct fru_rec *rec) struct cper_section_descriptor *sec_desc = &rec->sec_desc; struct cper_record_header *hdr = &rec->hdr; + /* + * This is a saved record created with fewer max_nr_entries. + * Update the record lengths and keep everything else as-is. + */ + if (hdr->record_length && hdr->record_length < max_rec_len) { + pr_debug("Growing record 0x%016llx from %u to %zu bytes\n", + hdr->record_id, hdr->record_length, max_rec_len); + goto update_lengths; + } + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); hdr->revision = CPER_RECORD_REV; hdr->signature_end = CPER_SIG_END; @@ -489,19 +501,21 @@ static void set_rec_fields(struct fru_rec *rec) hdr->error_severity = CPER_SEV_RECOVERABLE; hdr->validation_bits = 0; - hdr->record_length = max_rec_len; hdr->creator_id = CPER_CREATOR_FMP; hdr->notification_type = CPER_NOTIFY_MCE; hdr->record_id = cper_next_record_id(); hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; sec_desc->section_offset = sizeof(struct cper_record_header); - sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); sec_desc->revision = CPER_SEC_REV; sec_desc->validation_bits = 0; sec_desc->flags = CPER_SEC_PRIMARY; sec_desc->section_type = CPER_SECTION_TYPE_FMP; sec_desc->section_severity = CPER_SEV_RECOVERABLE; + +update_lengths: + hdr->record_length = max_rec_len; + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); } static int save_new_records(void) @@ -512,16 +526,18 @@ static int save_new_records(void) int ret = 0; for_each_fru(i, rec) { - if (rec->hdr.record_length) + /* No need to update saved records that match the current record size. */ + if (rec->hdr.record_length == max_rec_len) continue; + if (!rec->hdr.record_length) + set_bit(i, new_records); + set_rec_fields(rec); ret = update_record_on_storage(rec); if (ret) goto out_clear; - - set_bit(i, new_records); } return ret; @@ -641,12 +657,7 @@ static int get_saved_records(void) int ret, pos; ssize_t len; - /* - * Assume saved records match current max size. - * - * However, this may not be true depending on module parameters. - */ - old = kmalloc(max_rec_len, GFP_KERNEL); + old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL); if (!old) { ret = -ENOMEM; goto out; @@ -663,21 +674,31 @@ static int get_saved_records(void) * Make sure to clear temporary buffer between reads to avoid * leftover data from records of various sizes. */ - memset(old, 0, max_rec_len); + memset(old, 0, FMPM_MAX_REC_LEN); - len = erst_read_record(record_id, &old->hdr, max_rec_len, + len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN, sizeof(struct fru_rec), &CPER_CREATOR_FMP); if (len < 0) continue; - if (len > max_rec_len) { - pr_debug("Found record larger than max_rec_len\n"); + new = get_valid_record(old); + if (!new) { + erst_clear(record_id); continue; } - new = get_valid_record(old); - if (!new) - erst_clear(record_id); + if (len > max_rec_len) { + unsigned int saved_nr_entries; + + saved_nr_entries = len - sizeof(struct fru_rec); + saved_nr_entries /= sizeof(struct cper_fru_poison_desc); + + pr_warn("Saved record found with %u entries.\n", saved_nr_entries); + pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries); + + ret = -EINVAL; + goto out_end; + } /* Restore the record */ memcpy(new, old, len); diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index 4749ccdeeba1..5a2f48439258 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,10 @@ #include +#if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *ras_get_debugfs_root(void); +#else +static inline struct dentry *ras_get_debugfs_root(void) { return NULL; } +#endif /* DEBUG_FS */ #endif /* __RAS_DEBUGFS_H__ */ diff --git a/drivers/regulator/tps65132-regulator.c b/drivers/regulator/tps65132-regulator.c index a06f5f2d7932..9c2f0dd42613 100644 --- a/drivers/regulator/tps65132-regulator.c +++ b/drivers/regulator/tps65132-regulator.c @@ -267,10 +267,17 @@ static const struct i2c_device_id tps65132_id[] = { }; MODULE_DEVICE_TABLE(i2c, tps65132_id); +static const struct of_device_id __maybe_unused tps65132_of_match[] = { + { .compatible = "ti,tps65132" }, + {}, +}; +MODULE_DEVICE_TABLE(of, tps65132_of_match); + static struct i2c_driver tps65132_i2c_driver = { .driver = { .name = "tps65132", .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .of_match_table = of_match_ptr(tps65132_of_match), }, .probe = tps65132_probe, .id_table = tps65132_id, diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index f95d12345d98..920f550bc313 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -363,10 +363,8 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_lock_irq(cdev->ccwlock); ret = ccw_device_online(cdev); - spin_unlock_irq(cdev->ccwlock); - if (ret == 0) - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - else { + if (ret) { + spin_unlock_irq(cdev->ccwlock); CIO_MSG_EVENT(0, "ccw_device_online returned %d, " "device 0.%x.%04x\n", ret, cdev->private->dev_id.ssid, @@ -375,7 +373,12 @@ int ccw_device_set_online(struct ccw_device *cdev) put_device(&cdev->dev); return ret; } - spin_lock_irq(cdev->ccwlock); + /* Wait until a final state is reached */ + while (!dev_fsm_final_state(cdev)) { + spin_unlock_irq(cdev->ccwlock); + wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); + spin_lock_irq(cdev->ccwlock); + } /* Check if online processing was successful */ if ((cdev->private->state != DEV_STATE_ONLINE) && (cdev->private->state != DEV_STATE_W4SENSE)) { diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 65d8b2cfd626..42791fa0b80e 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -504,6 +504,11 @@ callback: ccw_device_done(cdev, DEV_STATE_ONLINE); /* Deliver fake irb to device driver, if needed. */ if (cdev->private->flags.fake_irb) { + CIO_MSG_EVENT(2, "fakeirb: deliver device 0.%x.%04x intparm %lx type=%d\n", + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, + cdev->private->intparm, + cdev->private->flags.fake_irb); create_fake_irb(&cdev->private->dma_area->irb, cdev->private->flags.fake_irb); cdev->private->flags.fake_irb = 0; diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 40c97f873075..acd6790dba4d 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -208,6 +208,10 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; cdev->private->intparm = intparm; + CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n", + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, intparm, + cdev->private->flags.fake_irb); return 0; } else /* There's already a fake I/O around. */ @@ -551,6 +555,10 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_TM_IRB; cdev->private->intparm = intparm; + CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n", + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, intparm, + cdev->private->flags.fake_irb); return 0; } else /* There's already a fake I/O around. */ diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 3d9f0834c78b..a1cb39f4b7a2 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, lgr_info_log(); } -static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, - int dstat) +static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, + int dstat, int dcc) { DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq"); @@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, goto error; if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END)) goto error; + if (dcc == 1) + return -EAGAIN; if (!(dstat & DEV_STAT_DEV_END)) goto error; qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); - return; + return 0; error: DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no); DBF_ERROR("ds: %2x cs:%2x", dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return -EIO; } /* qdio interrupt handler */ @@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - int cstat, dstat; + int cstat, dstat, rc, dcc; if (!intparm || !irq_ptr) { ccw_device_get_schid(cdev, &schid); @@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, qdio_irq_check_sense(irq_ptr, irb); cstat = irb->scsw.cmd.cstat; dstat = irb->scsw.cmd.dstat; + dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0; + rc = 0; switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(irq_ptr, cstat, dstat); + rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc); break; case QDIO_IRQ_STATE_CLEANUP: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); @@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cstat || dstat) qdio_handle_activate_check(irq_ptr, intparm, cstat, dstat); + else if (dcc == 1) + rc = -EAGAIN; break; case QDIO_IRQ_STATE_STOPPED: break; default: WARN_ON_ONCE(1); } + + if (rc == -EAGAIN) { + DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry"); + rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0); + if (!rc) + return; + DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no); + DBF_ERROR("rc:%4x", rc); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + } + wake_up(&cdev->private->wait_q); } diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 2c8e964425dc..43778b088ffa 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -292,13 +292,16 @@ out: static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { clear_bit(dmb->sba_idx, ism->sba_bitmap); - dma_free_coherent(&ism->pdev->dev, dmb->dmb_len, - dmb->cpu_addr, dmb->dma_addr); + dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len, + DMA_FROM_DEVICE); + folio_put(virt_to_folio(dmb->cpu_addr)); } static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { + struct folio *folio; unsigned long bit; + int rc; if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev)) return -EINVAL; @@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) test_and_set_bit(dmb->sba_idx, ism->sba_bitmap)) return -EINVAL; - dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len, - &dmb->dma_addr, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | __GFP_NORETRY); - if (!dmb->cpu_addr) - clear_bit(dmb->sba_idx, ism->sba_bitmap); + folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | + __GFP_NORETRY, get_order(dmb->dmb_len)); - return dmb->cpu_addr ? 0 : -ENOMEM; + if (!folio) { + rc = -ENOMEM; + goto out_bit; + } + + dmb->cpu_addr = folio_address(folio); + dmb->dma_addr = dma_map_page(&ism->pdev->dev, + virt_to_page(dmb->cpu_addr), 0, + dmb->dmb_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) { + rc = -ENOMEM; + goto out_free; + } + + return 0; + +out_free: + kfree(dmb->cpu_addr); +out_bit: + clear_bit(dmb->sba_idx, ism->sba_bitmap); + return rc; } int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a0cce6872075..f0b8b709649f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index 2c246e80c1c4..d91659811eb3 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n"); - spin_lock_bh(&tgt->cq_lock); ctx_base_ptr = tgt->ctx_base; tgt->ctx_base = NULL; @@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, tgt->sq, tgt->sq_dma); tgt->sq = NULL; } - spin_unlock_bh(&tgt->cq_lock); if (ctx_base_ptr) iounmap(ctx_base_ptr); diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 1befcd5b2a0f..fa07a6f54003 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -102,7 +102,9 @@ do { \ #define MAX_RETRIES 1 -static struct class * ch_sysfs_class; +static const struct class ch_sysfs_class = { + .name = "scsi_changer", +}; typedef struct { struct kref ref; @@ -930,7 +932,7 @@ static int ch_probe(struct device *dev) mutex_init(&ch->lock); kref_init(&ch->ref); ch->device = sd; - class_dev = device_create(ch_sysfs_class, dev, + class_dev = device_create(&ch_sysfs_class, dev, MKDEV(SCSI_CHANGER_MAJOR, ch->minor), ch, "s%s", ch->name); if (IS_ERR(class_dev)) { @@ -955,7 +957,7 @@ static int ch_probe(struct device *dev) return 0; destroy_dev: - device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); + device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); put_device: scsi_device_put(sd); remove_idr: @@ -974,7 +976,7 @@ static int ch_remove(struct device *dev) dev_set_drvdata(dev, NULL); spin_unlock(&ch_index_lock); - device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR,ch->minor)); + device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); scsi_device_put(ch->device); kref_put(&ch->ref, ch_destroy); return 0; @@ -1003,11 +1005,9 @@ static int __init init_ch_module(void) int rc; printk(KERN_INFO "SCSI Media Changer driver v" VERSION " \n"); - ch_sysfs_class = class_create("scsi_changer"); - if (IS_ERR(ch_sysfs_class)) { - rc = PTR_ERR(ch_sysfs_class); + rc = class_register(&ch_sysfs_class); + if (rc) return rc; - } rc = register_chrdev(SCSI_CHANGER_MAJOR,"ch",&changer_fops); if (rc < 0) { printk("Unable to get major %d for SCSI-Changer\n", @@ -1022,7 +1022,7 @@ static int __init init_ch_module(void) fail2: unregister_chrdev(SCSI_CHANGER_MAJOR, "ch"); fail1: - class_destroy(ch_sysfs_class); + class_unregister(&ch_sysfs_class); return rc; } @@ -1030,7 +1030,7 @@ static void __exit exit_ch_module(void) { scsi_unregister_driver(&ch_template.gendrv); unregister_chrdev(SCSI_CHANGER_MAJOR, "ch"); - class_destroy(ch_sysfs_class); + class_unregister(&ch_sysfs_class); idr_destroy(&ch_index_idr); } diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index debd36974119..e8382cc5cf23 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -28,7 +28,12 @@ MODULE_AUTHOR("Manoj N. Kumar "); MODULE_AUTHOR("Matthew R. Ochs "); MODULE_LICENSE("GPL"); -static struct class *cxlflash_class; +static char *cxlflash_devnode(const struct device *dev, umode_t *mode); +static const struct class cxlflash_class = { + .name = "cxlflash", + .devnode = cxlflash_devnode, +}; + static u32 cxlflash_major; static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS); @@ -3602,7 +3607,7 @@ static int init_chrdev(struct cxlflash_cfg *cfg) goto err1; } - char_dev = device_create(cxlflash_class, NULL, devno, + char_dev = device_create(&cxlflash_class, NULL, devno, NULL, "cxlflash%d", minor); if (IS_ERR(char_dev)) { rc = PTR_ERR(char_dev); @@ -3880,14 +3885,12 @@ static int cxlflash_class_init(void) cxlflash_major = MAJOR(devno); - cxlflash_class = class_create("cxlflash"); - if (IS_ERR(cxlflash_class)) { - rc = PTR_ERR(cxlflash_class); + rc = class_register(&cxlflash_class); + if (rc) { pr_err("%s: class_create failed rc=%d\n", __func__, rc); goto err; } - cxlflash_class->devnode = cxlflash_devnode; out: pr_debug("%s: returning rc=%d\n", __func__, rc); return rc; @@ -3903,7 +3906,7 @@ static void cxlflash_class_exit(void) { dev_t devno = MKDEV(cxlflash_major, 0); - class_destroy(cxlflash_class); + class_unregister(&cxlflash_class); unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS); } diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 097dfe4b620d..35f8e00850d6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device) if (dev_is_sata(device)) { struct ata_link *link = &device->sata_dev.ap->link; - rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT, smp_ata_check_ready_type); } else { msleep(2000); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7d2a33514538..34f96cc35342 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { - ts->stat = SAS_PROTO_RESPONSE; + if (task->ata_task.use_ncq) { + struct domain_device *device = task->dev; + struct hisi_sas_device *sas_dev = device->lldd_dev; + + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + slot->abort = 1; + } else { + ts->stat = SAS_PROTO_RESPONSE; + } } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN; diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 4f495a41ec4a..2d92549e5243 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_remove_host() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index a2204674b680..f6e6db8b8aba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size, static inline void *alloc_smp_req(int size) { - u8 *p = kzalloc(size, GFP_KERNEL); + u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL); if (p) p[0] = SMP_REQUEST; return p; @@ -1621,6 +1621,16 @@ out_err: /* ---------- Domain revalidation ---------- */ +static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1674,13 +1684,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM; res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; } @@ -1940,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1951,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -1989,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; } /* we always have to delete the old device when we went here */ @@ -1998,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; } /** diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 30d20d37554f..98ca7df003ef 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1333,7 +1333,6 @@ struct lpfc_hba { struct timer_list fabric_block_timer; unsigned long bit_flags; atomic_t num_rsrc_err; - atomic_t num_cmd_success; unsigned long last_rsrc_error_time; unsigned long last_ramp_down_time; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -1438,6 +1437,7 @@ struct lpfc_hba { struct timer_list inactive_vmid_poll; /* RAS Support */ + spinlock_t ras_fwlog_lock; /* do not take while holding another lock */ struct lpfc_ras_fwlog ras_fwlog; uint32_t iocb_cnt; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 365c7e96070b..3c534b3cfe91 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5865,9 +5865,9 @@ lpfc_ras_fwlog_buffsize_set(struct lpfc_hba *phba, uint val) if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn)) return -EINVAL; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); state = phba->ras_fwlog.state; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (state == REG_INPROGRESS) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging " diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index d80e6e81053b..529df1768fa8 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2513,7 +2513,7 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) return -ENOMEM; } - dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf; + dmabuff = mbox->ctx_buf; mbox->ctx_buf = NULL; mbox->ctx_ndlp = NULL; status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO); @@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) } cmdwqe = &cmdiocbq->wqe; - memset(cmdwqe, 0, sizeof(union lpfc_wqe)); + memset(cmdwqe, 0, sizeof(*cmdwqe)); if (phba->sli_rev < LPFC_SLI_REV4) { rspwqe = &rspiocbq->wqe; - memset(rspwqe, 0, sizeof(union lpfc_wqe)); + memset(rspwqe, 0, sizeof(*rspwqe)); } INIT_LIST_HEAD(&head); @@ -3376,7 +3376,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) unsigned long flags; uint8_t *pmb, *pmb_buf; - dd_data = pmboxq->ctx_ndlp; + dd_data = pmboxq->ctx_u.dd_data; /* * The outgoing buffer is readily referred from the dma buffer, @@ -3553,7 +3553,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) struct lpfc_sli_config_mbox *sli_cfg_mbx; uint8_t *pmbx; - dd_data = pmboxq->ctx_buf; + dd_data = pmboxq->ctx_u.dd_data; /* Determine if job has been aborted */ spin_lock_irqsave(&phba->ct_ev_lock, flags); @@ -3940,7 +3940,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4112,7 +4112,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4460,7 +4460,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4747,7 +4747,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, if (mbox_req->inExtWLen || mbox_req->outExtWLen) { from = pmbx; ext = from + sizeof(MAILBOX_t); - pmboxq->ctx_buf = ext; + pmboxq->ext_buf = ext; pmboxq->in_ext_byte_len = mbox_req->inExtWLen * sizeof(uint32_t); pmboxq->out_ext_byte_len = @@ -4875,7 +4875,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl; /* setup context field to pass wait_queue pointer to wake function */ - pmboxq->ctx_ndlp = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -5070,12 +5070,12 @@ lpfc_bsg_get_ras_config(struct bsg_job *job) bsg_reply->reply_data.vendor_reply.vendor_rsp; /* Current logging state */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state == ACTIVE) ras_reply->state = LPFC_RASLOG_STATE_RUNNING; else ras_reply->state = LPFC_RASLOG_STATE_STOPPED; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); ras_reply->log_level = phba->ras_fwlog.fw_loglevel; ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize; @@ -5132,13 +5132,13 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) if (action == LPFC_RASACTION_STOP_LOGGING) { /* Check if already disabled */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -ESRCH; goto ras_job_error; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Disable logging */ lpfc_ras_stop_fwlog(phba); @@ -5149,10 +5149,10 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) * FW-logging with new log-level. Return status * "Logging already Running" to caller. **/ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state != INACTIVE) action_status = -EINPROGRESS; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Enable logging */ rc = lpfc_sli4_ras_fwlog_init(phba, log_level, @@ -5268,13 +5268,13 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) goto ras_job_error; /* Logging to be stopped before reading */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state == ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -EINPROGRESS; goto ras_job_error; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (job->request_len < sizeof(struct fc_bsg_request) + diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index ab5af10c8a16..a2d2b02b3418 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2194,12 +2194,12 @@ static int lpfc_debugfs_ras_log_data(struct lpfc_hba *phba, memset(buffer, 0, size); - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (phba->ras_fwlog.state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); return -EINVAL; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); list_for_each_entry_safe(dmabuf, next, &phba->ras_fwlog.fwlog_buff_list, list) { @@ -2250,13 +2250,13 @@ lpfc_debugfs_ras_log_open(struct inode *inode, struct file *file) int size; int rc = -ENOMEM; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (phba->ras_fwlog.state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -EINVAL; goto out; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (check_mul_overflow(LPFC_RAS_MIN_BUFF_POST_SIZE, phba->cfg_ras_fwlog_buffsize, &size)) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 28e56542e072..f7c28dc73bf6 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4437,23 +4437,23 @@ lpfc_els_retry_delay(struct timer_list *t) unsigned long flags; struct lpfc_work_evt *evtp = &ndlp->els_retry_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, flags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, flags); + lpfc_nlp_put(ndlp); return; } - /* We need to hold the node by incrementing the reference - * count until the queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_ELS_RETRY; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); - } + evtp->evt_arg1 = ndlp; + evtp->evt = LPFC_EVT_ELS_RETRY; + list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, flags); - return; + + lpfc_worker_wake_up(phba); } /** @@ -7238,7 +7238,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context) goto rdp_fail; mbox->vport = rdp_context->ndlp->vport; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED); @@ -7290,7 +7290,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; mbox->mbox_offset_word = 5; - mbox->ctx_buf = virt; + mbox->ext_buf = virt; } else { bf_set(lpfc_mbx_memory_dump_type3_length, &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE); @@ -7298,7 +7298,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } mbox->vport = phba->pport; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); if (rc == MBX_NOT_FINISHED) { @@ -7307,7 +7306,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, } if (phba->sli_rev == LPFC_SLI_REV4) - mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); + mp = mbox->ctx_buf; else mp = mpsave; @@ -7350,7 +7349,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; mbox->mbox_offset_word = 5; - mbox->ctx_buf = virt; + mbox->ext_buf = virt; } else { bf_set(lpfc_mbx_memory_dump_type3_length, &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE); @@ -7358,7 +7357,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) { rc = 1; @@ -7500,9 +7498,9 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) int rc; mb = &pmb->u.mb; - lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp; + lcb_context = pmb->ctx_u.lcb; ndlp = lcb_context->ndlp; - pmb->ctx_ndlp = NULL; + memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u)); pmb->ctx_buf = NULL; shdr = (union lpfc_sli4_cfg_shdr *) @@ -7642,7 +7640,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport, lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON, LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len, LPFC_SLI4_MBX_EMBED); - mbox->ctx_ndlp = (void *)lcb_context; + mbox->ctx_u.lcb = lcb_context; mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_els_lcb_rsp; bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config, @@ -8639,9 +8637,9 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb = &pmb->u.mb; ndlp = pmb->ctx_ndlp; - rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff); - oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff); - pmb->ctx_buf = NULL; + rxid = (uint16_t)(pmb->ctx_u.ox_rx_id & 0xffff); + oxid = (uint16_t)((pmb->ctx_u.ox_rx_id >> 16) & 0xffff); + memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u)); pmb->ctx_ndlp = NULL; if (mb->mbxStatus) { @@ -8745,8 +8743,7 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); if (mbox) { lpfc_read_lnk_stat(phba, mbox); - mbox->ctx_buf = (void *)((unsigned long) - (ox_id << 16 | ctx)); + mbox->ctx_u.ox_rx_id = ox_id << 16 | ctx; mbox->ctx_ndlp = lpfc_nlp_get(ndlp); if (!mbox->ctx_ndlp) goto node_err; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index a7a2309a629f..e42fa9c822b5 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -257,7 +257,9 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) if (evtp->evt_arg1) { evtp->evt = LPFC_EVT_DEV_LOSS; list_add_tail(&evtp->evt_listp, &phba->work_list); + spin_unlock_irqrestore(&phba->hbalock, iflags); lpfc_worker_wake_up(phba); + return; } spin_unlock_irqrestore(&phba->hbalock, iflags); } else { @@ -275,10 +277,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); } - } - - return; } /** @@ -3429,7 +3428,7 @@ static void lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + struct lpfc_dmabuf *mp = pmb->ctx_buf; struct lpfc_vport *vport = pmb->vport; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; @@ -3737,7 +3736,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_mbx_read_top *la; struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_dmabuf *mp = pmb->ctx_buf; uint8_t attn_type; /* Unblock ELS traffic */ @@ -3851,8 +3850,8 @@ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_dmabuf *mp = pmb->ctx_buf; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; /* The driver calls the state machine with the pmb pointer * but wants to make sure a stale ctx_buf isn't acted on. @@ -4066,7 +4065,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba) * the dump routine is a single-use construct. */ if (pmb->ctx_buf) { - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); pmb->ctx_buf = NULL; @@ -4089,7 +4088,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba) if (phba->sli_rev == LPFC_SLI_REV4) { byte_count = pmb->u.mqe.un.mb_words[5]; - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; if (byte_count > sizeof(struct static_vport_info) - offset) byte_count = sizeof(struct static_vport_info) @@ -4169,7 +4168,7 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; pmb->ctx_ndlp = NULL; @@ -4307,7 +4306,7 @@ void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; int rc; @@ -4431,7 +4430,7 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; pmb->ctx_ndlp = NULL; if (mb->mbxStatus) { @@ -5174,7 +5173,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_vport *vport = pmb->vport; struct lpfc_nodelist *ndlp; - ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp); + ndlp = pmb->ctx_ndlp; if (!ndlp) return; lpfc_issue_els_logo(vport, ndlp, 0); @@ -5496,7 +5495,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { mb->ctx_ndlp = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } @@ -5507,7 +5506,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) { if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) || (mb->mbox_flag & LPFC_MBX_IMED_UNREG) || - (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp)) + (ndlp != mb->ctx_ndlp)) continue; mb->ctx_ndlp = NULL; @@ -5517,7 +5516,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { list_del(&mb->list); lpfc_mbox_rsrc_cleanup(phba, mb, MBOX_THD_LOCKED); @@ -6357,7 +6356,7 @@ void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; pmb->ctx_ndlp = NULL; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 88b2e57d90c2..f7a0aa3625f4 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -460,7 +460,7 @@ lpfc_config_port_post(struct lpfc_hba *phba) return -EIO; } - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; /* This dmabuf was allocated by lpfc_read_sparam. The dmabuf is no * longer needed. Prevent unintended ctx_buf access as the mbox is @@ -2217,7 +2217,7 @@ lpfc_handle_latt(struct lpfc_hba *phba) /* Cleanup any outstanding ELS commands */ lpfc_els_flush_all_cmd(phba); psli->slistat.link_event++; - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = vport; /* Block ELS IOCBs until we have processed this mbox command */ @@ -5454,7 +5454,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba, phba->sli.slistat.link_event++; /* Create lpfc_handle_latt mailbox command from link ACQE */ - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = phba->pport; @@ -6347,7 +6347,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) phba->sli.slistat.link_event++; /* Create lpfc_handle_latt mailbox command from link ACQE */ - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = phba->pport; @@ -7705,6 +7705,9 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) "NVME" : " "), (phba->nvmet_support ? "NVMET" : " ")); + /* ras_fwlog state */ + spin_lock_init(&phba->ras_fwlog_lock); + /* Initialize the IO buffer list used by driver for SLI3 SCSI */ spin_lock_init(&phba->scsi_buf_list_get_lock); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get); @@ -13055,7 +13058,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) rc = request_threaded_irq(eqhdl->irq, &lpfc_sli4_hba_intr_handler, &lpfc_sli4_hba_intr_handler_th, - IRQF_ONESHOT, name, eqhdl); + 0, name, eqhdl); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0486 MSI-X fast-path (%d) " diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index f7c41958036b..e98f1c2b2220 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -102,7 +102,7 @@ lpfc_mbox_rsrc_cleanup(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox, { struct lpfc_dmabuf *mp; - mp = (struct lpfc_dmabuf *)mbox->ctx_buf; + mp = mbox->ctx_buf; mbox->ctx_buf = NULL; /* Release the generic BPL buffer memory. */ @@ -204,10 +204,8 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, uint16_t region_id) { MAILBOX_t *mb; - void *ctx; mb = &pmb->u.mb; - ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); @@ -219,7 +217,6 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t)); mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->ctx_buf = ctx; mb->mbxOwner = OWN_HOST; return; } @@ -236,11 +233,8 @@ void lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb; - void *ctx; mb = &pmb->u.mb; - /* Save context so that we can restore after memset */ - ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof(LPFC_MBOXQ_t)); @@ -254,7 +248,6 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE; mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->ctx_buf = ctx; return; } @@ -372,7 +365,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, /* Save address for later completion and set the owner to host so that * the FW knows this mailbox is available for processing. */ - pmb->ctx_buf = (uint8_t *)mp; + pmb->ctx_buf = mp; mb->mbxOwner = OWN_HOST; return (0); } @@ -1816,7 +1809,7 @@ lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox) } /* Reinitialize the context pointers to avoid stale usage. */ mbox->ctx_buf = NULL; - mbox->context3 = NULL; + memset(&mbox->ctx_u, 0, sizeof(mbox->ctx_u)); kfree(mbox->sge_array); /* Finally, free the mailbox command itself */ mempool_free(mbox, phba->mbox_mem_pool); @@ -2366,8 +2359,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) { MAILBOX_t *mb; int rc = FAILURE; - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mboxq->ctx_ndlp); + struct lpfc_rdp_context *rdp_context = mboxq->ctx_u.rdp; mb = &mboxq->u.mb; if (mb->mbxStatus) @@ -2385,9 +2377,8 @@ mbx_failed: static void lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf; - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->ctx_ndlp); + struct lpfc_dmabuf *mp = mbox->ctx_buf; + struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp; if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error_mbox_free; @@ -2401,7 +2392,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) /* Save the dma buffer for cleanup in the final completion. */ mbox->ctx_buf = mp; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED) goto error_mbox_free; @@ -2416,9 +2407,8 @@ void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { int rc; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->ctx_ndlp); + struct lpfc_dmabuf *mp = mbox->ctx_buf; + struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp; if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error; @@ -2448,7 +2438,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) goto error; diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 8e425be7c7c9..c4172791c267 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -300,7 +300,7 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox) int rc; ndlp = login_mbox->ctx_ndlp; - save_iocb = login_mbox->context3; + save_iocb = login_mbox->ctx_u.save_iocb; if (mb->mbxStatus == MBX_SUCCESS) { /* Now that REG_RPI completed successfully, @@ -640,7 +640,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (!login_mbox->ctx_ndlp) goto out; - login_mbox->context3 = save_iocb; /* For PLOGI ACC */ + login_mbox->ctx_u.save_iocb = save_iocb; /* For PLOGI ACC */ spin_lock_irq(&ndlp->lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); @@ -682,8 +682,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) struct lpfc_nodelist *ndlp; uint32_t cmd; - elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf; - ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp; + elsiocb = mboxq->ctx_u.save_iocb; + ndlp = mboxq->ctx_ndlp; vport = mboxq->vport; cmd = elsiocb->drvrTimeout; @@ -1875,7 +1875,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, /* cleanup any ndlp on mbox q waiting for reglogin cmpl */ if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; lpfc_nlp_put(ndlp); mb->ctx_ndlp = NULL; @@ -1886,7 +1886,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; lpfc_nlp_put(ndlp); list_del(&mb->list); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 09c53b85bcb8..c5792eaf3f64 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2616,9 +2616,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* No concern about the role change on the nvme remoteport. * The transport will update it. */ - spin_lock_irq(&vport->phba->hbalock); + spin_lock_irq(&ndlp->lock); ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT; - spin_unlock_irq(&vport->phba->hbalock); + spin_unlock_irq(&ndlp->lock); /* Don't let the host nvme transport keep sending keep-alives * on this remoteport. Vport is unloading, no recovery. The diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 8258b771bd00..561ced5503c6 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->cmd_dmabuf = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c0038eaae7b0..4a6e5223a224 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -167,11 +167,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) struct Scsi_Host *shost; struct scsi_device *sdev; unsigned long new_queue_depth; - unsigned long num_rsrc_err, num_cmd_success; + unsigned long num_rsrc_err; int i; num_rsrc_err = atomic_read(&phba->num_rsrc_err); - num_cmd_success = atomic_read(&phba->num_cmd_success); /* * The error and success command counters are global per @@ -186,20 +185,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { shost = lpfc_shost_from_vport(vports[i]); shost_for_each_device(sdev, shost) { - new_queue_depth = - sdev->queue_depth * num_rsrc_err / - (num_rsrc_err + num_cmd_success); - if (!new_queue_depth) - new_queue_depth = sdev->queue_depth - 1; + if (num_rsrc_err >= sdev->queue_depth) + new_queue_depth = 1; else new_queue_depth = sdev->queue_depth - - new_queue_depth; + num_rsrc_err; scsi_change_queue_depth(sdev, new_queue_depth); } } lpfc_destroy_vport_work_array(phba, vports); atomic_set(&phba->num_rsrc_err, 0); - atomic_set(&phba->num_cmd_success, 0); } /** @@ -5336,16 +5331,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) } err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd); } else { - if (vport->phba->cfg_enable_bg) { - lpfc_printf_vlog(vport, - KERN_INFO, LOG_SCSI_CMD, - "9038 BLKGRD: rcvd PROT_NORMAL cmd: " - "x%x reftag x%x cnt %u pt %x\n", - cmnd->cmnd[0], - scsi_prot_ref_tag(cmnd), - scsi_logical_block_count(cmnd), - (cmnd->cmnd[1]>>5)); - } err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd); } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1f8a9b5945cb..a028e008dd1e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1217,9 +1217,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, empty = list_empty(&phba->active_rrq_list); list_add_tail(&rrq->list, &phba->active_rrq_list); phba->hba_flag |= HBA_RRQ_ACTIVE; + spin_unlock_irqrestore(&phba->hbalock, iflags); if (empty) lpfc_worker_wake_up(phba); - spin_unlock_irqrestore(&phba->hbalock, iflags); return 0; out: spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -2830,7 +2830,7 @@ lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) */ pmboxq->mbox_flag |= LPFC_MBX_WAKE; spin_lock_irqsave(&phba->hbalock, drvr_flag); - pmbox_done = (struct completion *)pmboxq->context3; + pmbox_done = pmboxq->ctx_u.mbox_wait; if (pmbox_done) complete(pmbox_done); spin_unlock_irqrestore(&phba->hbalock, drvr_flag); @@ -2885,7 +2885,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) && pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 && !pmb->u.mb.mbxStatus) { - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; if (mp) { pmb->ctx_buf = NULL; lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -2914,12 +2914,12 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; lpfc_nlp_put(ndlp); } if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; /* Check to see if there are any deferred events to process */ if (ndlp) { @@ -2952,7 +2952,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) /* This nlp_put pairs with lpfc_sli4_resume_rpi */ if (pmb->u.mb.mbxCommand == MBX_RESUME_RPI) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; lpfc_nlp_put(ndlp); } @@ -5819,7 +5819,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba) goto out_free_mboxq; } - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, @@ -6849,9 +6849,9 @@ lpfc_ras_stop_fwlog(struct lpfc_hba *phba) { struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Disable FW logging to host memory */ writel(LPFC_CTL_PDEV_CTL_DDL_RAS, @@ -6894,9 +6894,9 @@ lpfc_sli4_ras_dma_free(struct lpfc_hba *phba) ras_fwlog->lwpd.virt = NULL; } - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); } /** @@ -6998,9 +6998,9 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) goto disable_ras; } - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = ACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); mempool_free(pmb, phba->mbox_mem_pool); return; @@ -7032,9 +7032,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, uint32_t len = 0, fwlog_buffsize, fwlog_entry_count; int rc = 0; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE * phba->cfg_ras_fwlog_buffsize); @@ -7095,9 +7095,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys); mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys); - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = REG_INPROGRESS; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl; @@ -8766,7 +8766,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) mboxq->vport = vport; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; if (rc == MBX_SUCCESS) { memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm)); rc = 0; @@ -9548,8 +9548,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, } /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->ctx_buf) { - lpfc_sli_pcimem_bcopy(pmbox->ctx_buf, + if (pmbox->in_ext_byte_len && pmbox->ext_buf) { + lpfc_sli_pcimem_bcopy(pmbox->ext_buf, (uint8_t *)phba->mbox_ext, pmbox->in_ext_byte_len); } @@ -9562,10 +9562,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, = MAILBOX_HBA_EXT_OFFSET; /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->ctx_buf) + if (pmbox->in_ext_byte_len && pmbox->ext_buf) lpfc_memcpy_to_slim(phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, - pmbox->ctx_buf, pmbox->in_ext_byte_len); + pmbox->ext_buf, pmbox->in_ext_byte_len); if (mbx->mbxCommand == MBX_CONFIG_PORT) /* copy command data into host mbox for cmpl */ @@ -9688,9 +9688,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { + if (pmbox->out_ext_byte_len && pmbox->ext_buf) { lpfc_sli_pcimem_bcopy(phba->mbox_ext, - pmbox->ctx_buf, + pmbox->ext_buf, pmbox->out_ext_byte_len); } } else { @@ -9698,9 +9698,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_memcpy_from_slim(mbx, phba->MBslimaddr, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { + if (pmbox->out_ext_byte_len && pmbox->ext_buf) { lpfc_memcpy_from_slim( - pmbox->ctx_buf, + pmbox->ext_buf, phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, pmbox->out_ext_byte_len); @@ -11373,18 +11373,18 @@ lpfc_sli_post_recovery_event(struct lpfc_hba *phba, unsigned long iflags; struct lpfc_work_evt *evtp = &ndlp->recovery_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, iflags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, iflags); + lpfc_nlp_put(ndlp); return; } - /* Incrementing the reference count until the queued work is done. */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (!evtp->evt_arg1) { - spin_unlock_irqrestore(&phba->hbalock, iflags); - return; - } + evtp->evt_arg1 = ndlp; evtp->evt = LPFC_EVT_RECOVER_PORT; list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -13262,9 +13262,9 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, /* setup wake call as IOCB callback */ pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait; - /* setup context3 field to pass wait_queue pointer to wake function */ + /* setup ctx_u field to pass wait_queue pointer to wake function */ init_completion(&mbox_done); - pmboxq->context3 = &mbox_done; + pmboxq->ctx_u.mbox_wait = &mbox_done; /* now issue the command */ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT); if (retval == MBX_BUSY || retval == MBX_SUCCESS) { @@ -13272,7 +13272,7 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, msecs_to_jiffies(timeout * 1000)); spin_lock_irqsave(&phba->hbalock, flag); - pmboxq->context3 = NULL; + pmboxq->ctx_u.mbox_wait = NULL; /* * if LPFC_MBX_WAKE flag is set the mailbox is completed * else do not free the resources. @@ -13813,10 +13813,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); if (pmb->out_ext_byte_len && - pmb->ctx_buf) + pmb->ext_buf) lpfc_sli_pcimem_bcopy( phba->mbox_ext, - pmb->ctx_buf, + pmb->ext_buf, pmb->out_ext_byte_len); } if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) { @@ -13830,10 +13830,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) pmbox->un.varWords[0], 0); if (!pmbox->mbxStatus) { - mp = (struct lpfc_dmabuf *) - (pmb->ctx_buf); - ndlp = (struct lpfc_nodelist *) - pmb->ctx_ndlp; + mp = pmb->ctx_buf; + ndlp = pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was * successful. new lets get @@ -14340,8 +14338,8 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe) mcqe_status, pmbox->un.varWords[0], 0); if (mcqe_status == MB_CQE_STATUS_SUCCESS) { - mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + mp = pmb->ctx_buf; + ndlp = pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was successful. Mark the * node as having an UNREG_LOGIN in progress to stop @@ -19823,14 +19821,15 @@ lpfc_sli4_remove_rpis(struct lpfc_hba *phba) * lpfc_sli4_resume_rpi - Remove the rpi bitmask region * @ndlp: pointer to lpfc nodelist data structure. * @cmpl: completion call-back. - * @arg: data to load as MBox 'caller buffer information' + * @iocbq: data to load as mbox ctx_u information * * This routine is invoked to remove the memory region that * provided rpi via a bitmask. **/ int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, - void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *arg) + void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), + struct lpfc_iocbq *iocbq) { LPFC_MBOXQ_t *mboxq; struct lpfc_hba *phba = ndlp->phba; @@ -19859,7 +19858,7 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, lpfc_resume_rpi(mboxq, ndlp); if (cmpl) { mboxq->mbox_cmpl = cmpl; - mboxq->ctx_buf = arg; + mboxq->ctx_u.save_iocb = iocbq; } else mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; mboxq->ctx_ndlp = ndlp; @@ -20676,7 +20675,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data) if (lpfc_sli4_dump_cfg_rg23(phba, mboxq)) goto out; mqe = &mboxq->u.mqe; - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); if (rc) goto out; @@ -21035,7 +21034,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) (mb->u.mb.mbxCommand == MBX_REG_VPI)) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + act_mbx_ndlp = mb->ctx_ndlp; /* This reference is local to this routine. The * reference is removed at routine exit. @@ -21064,7 +21063,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + ndlp = mb->ctx_ndlp; /* Unregister the RPI when mailbox complete */ mb->mbox_flag |= LPFC_MBX_IMED_UNREG; restart_loop = 1; @@ -21084,7 +21083,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) while (!list_empty(&mbox_cmd_list)) { list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list); if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + ndlp = mb->ctx_ndlp; mb->ctx_ndlp = NULL; if (ndlp) { spin_lock(&ndlp->lock); diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index c911a39cb46b..cf7c42ec0306 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -182,11 +182,29 @@ typedef struct lpfcMboxq { struct lpfc_mqe mqe; } u; struct lpfc_vport *vport; /* virtual port pointer */ - void *ctx_ndlp; /* an lpfc_nodelist pointer */ - void *ctx_buf; /* an lpfc_dmabuf pointer */ - void *context3; /* a generic pointer. Code must - * accommodate the actual datatype. - */ + struct lpfc_nodelist *ctx_ndlp; /* caller ndlp pointer */ + struct lpfc_dmabuf *ctx_buf; /* caller buffer information */ + void *ext_buf; /* extended buffer for extended mbox + * cmds. Not a generic pointer. + * Use for storing virtual address. + */ + + /* Pointers that are seldom used during mbox execution, but require + * a saved context. + */ + union { + unsigned long ox_rx_id; /* Used in els_rsp_rls_acc */ + struct lpfc_rdp_context *rdp; /* Used in get_rdp_info */ + struct lpfc_lcb_context *lcb; /* Used in set_beacon */ + struct completion *mbox_wait; /* Used in issue_mbox_wait */ + struct bsg_job_data *dd_data; /* Used in bsg_issue_mbox_cmpl + * and + * bsg_issue_mbox_ext_handle_job + */ + struct lpfc_iocbq *save_iocb; /* Used in defer_plogi_acc and + * lpfc_mbx_cmpl_resume_rpi + */ + } ctx_u; void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *); uint8_t mbox_flag; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 2541a8fba093..c1e9ec0243ba 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2009-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -1118,8 +1118,9 @@ void lpfc_sli4_free_rpi(struct lpfc_hba *, int); void lpfc_sli4_remove_rpis(struct lpfc_hba *); void lpfc_sli4_async_event_proc(struct lpfc_hba *); void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); -int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, - void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); +int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, + void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), + struct lpfc_iocbq *iocbq); void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba); void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd); diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 56f5889dbaf9..915f2f11fb55 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.4.0.0" +#define LPFC_DRIVER_VERSION "14.4.0.1" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 0f79840b9498..4439167a5188 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -166,7 +166,7 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport) } } - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, sizeof (struct lpfc_name)); @@ -674,10 +674,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport) lpfc_free_sysfs_attr(vport); lpfc_debugfs_terminate(vport); - /* Remove FC host to break driver binding. */ - fc_remove_host(shost); - scsi_remove_host(shost); - /* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) @@ -721,6 +717,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport) skip_logo: + /* Remove FC host to break driver binding. */ + fc_remove_host(shost); + scsi_remove_host(shost); + lpfc_cleanup(vport); /* Remove scsi host now. The nodes are cleaned up. */ diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 0380996b5ad2..55d590b91947 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -1644,7 +1644,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if ((mpirep_offset != 0xFF) && drv_bufs[mpirep_offset].bsg_buf_len) { drv_buf_iter = &drv_bufs[mpirep_offset]; - drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 + + drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) + mrioc->reply_sz); bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL); diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index ca2e932dd9b7..f684eb5e0489 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrb_devstate_name(ldev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); } else { struct myrb_pdev_state *pdev_info = sdev->hostdata; @@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, else name = myrb_devstate_name(pdev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->state); } return ret; @@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, name = myrb_raidlevel_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } - return snprintf(buf, 32, "Physical Drive\n"); + return snprintf(buf, 64, "Physical Drive\n"); } static DEVICE_ATTR_RO(raid_level); @@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < myrb_logical_channel(sdev->host)) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); status = myrb_get_rbld_progress(cb, &rbld_buf); if (rbld_buf.ldev_num != sdev->id || status != MYRB_STATUS_SUCCESS) - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); - return snprintf(buf, 32, "rebuilding block %u of %u\n", + return snprintf(buf, 64, "rebuilding block %u of %u\n", rbld_buf.ldev_size - rbld_buf.blocks_left, rbld_buf.ldev_size); } diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index a1eec65a9713..e824be9d9bbb 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrs_devstate_name(ldev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else { struct myrs_pdev_info *pdev_info; @@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev, pdev_info = sdev->hostdata; name = myrs_devstate_name(pdev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->dev_state); } return ret; @@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev, ldev_info = sdev->hostdata; name = myrs_raid_level_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } static DEVICE_ATTR_RO(raid_level); @@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); ldev_info = sdev->hostdata; ldev_num = ldev_info->ldev_num; @@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev, return -EIO; } if (ldev_info->rbld_active) { - return snprintf(buf, 32, "rebuilding block %zu of %zu\n", + return snprintf(buf, 64, "rebuilding block %zu of %zu\n", (size_t)ldev_info->rbld_lba, (size_t)ldev_info->cfg_devsize); } else - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); } static ssize_t rebuild_store(struct device *dev, @@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev, unsigned short ldev_num; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not checking\n"); + return snprintf(buf, 64, "physical device - not checking\n"); ldev_info = sdev->hostdata; if (!ldev_info) @@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev, ldev_num = ldev_info->ldev_num; myrs_get_ldev_info(cs, ldev_num, ldev_info); if (ldev_info->cc_active) - return snprintf(buf, 32, "checking block %zu of %zu\n", + return snprintf(buf, 64, "checking block %zu of %zu\n", (size_t)ldev_info->cc_lba, (size_t)ldev_info->cfg_devsize); else - return snprintf(buf, 32, "not checking\n"); + return snprintf(buf, 64, "not checking\n"); } static ssize_t consistency_check_store(struct device *dev, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index e8bcc3a88732..0614b7e366b7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -61,7 +61,9 @@ static atomic_t pmcraid_adapter_count = ATOMIC_INIT(0); * pmcraid_minor - minor number(s) to use */ static unsigned int pmcraid_major; -static struct class *pmcraid_class; +static const struct class pmcraid_class = { + .name = PMCRAID_DEVFILE, +}; static DECLARE_BITMAP(pmcraid_minor, PMCRAID_MAX_ADAPTERS); /* @@ -4723,7 +4725,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance) if (error) pmcraid_release_minor(minor); else - device_create(pmcraid_class, NULL, MKDEV(pmcraid_major, minor), + device_create(&pmcraid_class, NULL, MKDEV(pmcraid_major, minor), NULL, "%s%u", PMCRAID_DEVFILE, minor); return error; } @@ -4739,7 +4741,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance) static void pmcraid_release_chrdev(struct pmcraid_instance *pinstance) { pmcraid_release_minor(MINOR(pinstance->cdev.dev)); - device_destroy(pmcraid_class, + device_destroy(&pmcraid_class, MKDEV(pmcraid_major, MINOR(pinstance->cdev.dev))); cdev_del(&pinstance->cdev); } @@ -5390,10 +5392,10 @@ static int __init pmcraid_init(void) } pmcraid_major = MAJOR(dev); - pmcraid_class = class_create(PMCRAID_DEVFILE); - if (IS_ERR(pmcraid_class)) { - error = PTR_ERR(pmcraid_class); + error = class_register(&pmcraid_class); + + if (error) { pmcraid_err("failed to register with sysfs, error = %x\n", error); goto out_unreg_chrdev; @@ -5402,7 +5404,7 @@ static int __init pmcraid_init(void) error = pmcraid_netlink_init(); if (error) { - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); goto out_unreg_chrdev; } @@ -5413,7 +5415,7 @@ static int __init pmcraid_init(void) pmcraid_err("failed to register pmcraid driver, error = %x\n", error); - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); pmcraid_netlink_release(); out_unreg_chrdev: @@ -5432,7 +5434,7 @@ static void __exit pmcraid_exit(void) unregister_chrdev_region(MKDEV(pmcraid_major, 0), PMCRAID_MAX_ADAPTERS); pci_unregister_driver(&pmcraid_driver); - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); } module_init(pmcraid_init); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 44449c70a375..76eeba435fd0 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index deb642607deb..2f49baf131e2 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -82,7 +82,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc." +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 26e6b3e3af43..dcde55c8ee5d 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { - if (pcnt > app_req.num_ports) + if (pcnt >= app_req.num_ports) break; app_reply->elem[pcnt].rekey_count = diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 09cb9413670a..7309310d2ab9 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a314cfc5b263..8377624d76c9 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1193,8 +1193,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); @@ -2665,6 +2669,40 @@ exit: return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3668,9 +3706,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3699,27 +3736,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3744,14 +3771,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3759,13 +3778,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3820,10 +3832,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4253,7 +4265,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4320,7 +4331,6 @@ execute_fw_with_lr: goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4349,12 +4359,11 @@ enable_82xx_npiv: if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -7487,12 +7496,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7561,6 +7570,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7581,31 +7599,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { @@ -7655,6 +7649,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index df90169f8244..0b41e8a06602 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2587,6 +2587,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2610,7 +2637,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2692,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2723,6 +2751,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2747,6 +2776,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -3012,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3027,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3037,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3055,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3064,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); + return rval; - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } - -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } @@ -3918,7 +3938,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 21ec32b4fb28..0cd6f3e14882 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index dd674378f2f3..1e2f52210f60 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4602,6 +4602,7 @@ fail_free_init_cb: ha->init_cb_dma = 0; fail_free_vp_map: kfree(ha->vp_map); + ha->vp_map = NULL; fail: ql_log(ql_log_fatal, NULL, 0x0030, "Memory allocation failure.\n"); @@ -5583,7 +5584,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2ef2dbac0db2..d7551b1443e4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1062,6 +1062,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index d903563e969e..7627fd807bc3 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -6,9 +6,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "10.02.09.100-k" +#define QLA2XXX_VERSION "10.02.09.200-k" #define QLA_DRIVER_MAJOR_VER 10 #define QLA_DRIVER_MINOR_VER 2 #define QLA_DRIVER_PATCH_VER 9 -#define QLA_DRIVER_BETA_VER 100 +#define QLA_DRIVER_BETA_VER 200 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2e28e2360c85..5b3230ef51fe 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -635,10 +635,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (blk_queue_add_random(q)) add_disk_randomness(req->q->disk); - if (!blk_rq_is_passthrough(req)) { - WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED)); - cmd->flags &= ~SCMD_INITIALIZED; - } + WARN_ON_ONCE(!blk_rq_is_passthrough(req) && + !(cmd->flags & SCMD_INITIALIZED)); + cmd->flags = 0; /* * Calling rcu_barrier() is not necessary here because the diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a..ffd7e7e72933 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ccff8f2e2e75..58fdf679341d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3920,7 +3920,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { - put_device(&sdkp->disk_dev); + device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } @@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; @@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime) sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { @@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 86210e4dd0d3..baf870a03ecf 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp) int dev = iminor(inode); int flags = filp->f_flags; struct request_queue *q; + struct scsi_device *device; Sg_device *sdp; Sg_fd *sfp; int retval; @@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp) /* This driver's module count bumped by fops_get in */ /* Prevent the device driver from vanishing while we sleep */ - retval = scsi_device_get(sdp->device); + device = sdp->device; + retval = scsi_device_get(device); if (retval) goto sg_put; - retval = scsi_autopm_get_device(sdp->device); + retval = scsi_autopm_get_device(device); if (retval) goto sdp_put; @@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp) * check if O_NONBLOCK. Permits SCSI commands to be issued * during error recovery. Tread carefully. */ if (!((flags & O_NONBLOCK) || - scsi_block_when_processing_errors(sdp->device))) { + scsi_block_when_processing_errors(device))) { retval = -ENXIO; /* we are in error recovery for this device */ goto error_out; @@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp) if (sdp->open_cnt < 1) { /* no existing opens */ sdp->sgdebug = 0; - q = sdp->device->request_queue; + q = device->request_queue; sdp->sg_tablesize = queue_max_segments(q); } sfp = sg_add_sfp(sdp); @@ -370,10 +372,11 @@ out_undo: error_mutex_locked: mutex_unlock(&sdp->open_rel_lock); error_out: - scsi_autopm_put_device(sdp->device); + scsi_autopm_put_device(device); sdp_put: - scsi_device_put(sdp->device); - goto sg_put; + kref_put(&sdp->d_ref, sg_device_destroy); + scsi_device_put(device); + return retval; } /* Release resources associated with a successful sg_open() @@ -1424,7 +1427,9 @@ static const struct file_operations sg_fops = { .llseek = no_llseek, }; -static struct class *sg_sysfs_class; +static const struct class sg_sysfs_class = { + .name = "scsi_generic" +}; static int sg_sysfs_valid = 0; @@ -1526,7 +1531,7 @@ sg_add_device(struct device *cl_dev) if (sg_sysfs_valid) { struct device *sg_class_member; - sg_class_member = device_create(sg_sysfs_class, cl_dev->parent, + sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), sdp, "%s", sdp->name); @@ -1616,7 +1621,7 @@ sg_remove_device(struct device *cl_dev) read_unlock_irqrestore(&sdp->sfd_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); - device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); + device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); cdev_del(sdp->cdev); sdp->cdev = NULL; @@ -1687,11 +1692,9 @@ init_sg(void) SG_MAX_DEVS, "sg"); if (rc) return rc; - sg_sysfs_class = class_create("scsi_generic"); - if ( IS_ERR(sg_sysfs_class) ) { - rc = PTR_ERR(sg_sysfs_class); + rc = class_register(&sg_sysfs_class); + if (rc) goto err_out; - } sg_sysfs_valid = 1; rc = scsi_register_interface(&sg_interface); if (0 == rc) { @@ -1700,7 +1703,7 @@ init_sg(void) #endif /* CONFIG_SCSI_PROC_FS */ return 0; } - class_destroy(sg_sysfs_class); + class_unregister(&sg_sysfs_class); register_sg_sysctls(); err_out: unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); @@ -1715,7 +1718,7 @@ exit_sg(void) remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ scsi_unregister_interface(&sg_interface); - class_destroy(sg_sysfs_class); + class_unregister(&sg_sysfs_class); sg_sysfs_valid = 0; unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); @@ -2207,6 +2210,7 @@ sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; + struct scsi_device *device = sdp->device; Sg_request *srp; unsigned long iflags; @@ -2232,8 +2236,8 @@ sg_remove_sfp_usercontext(struct work_struct *work) "sg_remove_sfp: sfp=0x%p\n", sfp)); kfree(sfp); - scsi_device_put(sdp->device); kref_put(&sdp->d_ref, sg_device_destroy); + scsi_device_put(device); module_put(THIS_MODULE); } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 338aa8c42968..5a9bcf8e0792 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -87,7 +87,7 @@ static int try_rdio = 1; static int try_wdio = 1; static int debug_flag; -static struct class st_sysfs_class; +static const struct class st_sysfs_class; static const struct attribute_group *st_dev_groups[]; static const struct attribute_group *st_drv_groups[]; @@ -4438,7 +4438,7 @@ static void scsi_tape_release(struct kref *kref) return; } -static struct class st_sysfs_class = { +static const struct class st_sysfs_class = { .name = "scsi_tape", .dev_groups = st_dev_groups, }; diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 079035db7dd8..92a662d1b55c 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -852,39 +852,39 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(fsl_lpspi->base)) { ret = PTR_ERR(fsl_lpspi->base); - goto out_controller_put; + return ret; } fsl_lpspi->base_phys = res->start; irq = platform_get_irq(pdev, 0); if (irq < 0) { ret = irq; - goto out_controller_put; + return ret; } ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0, dev_name(&pdev->dev), fsl_lpspi); if (ret) { dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret); - goto out_controller_put; + return ret; } fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per"); if (IS_ERR(fsl_lpspi->clk_per)) { ret = PTR_ERR(fsl_lpspi->clk_per); - goto out_controller_put; + return ret; } fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(fsl_lpspi->clk_ipg)) { ret = PTR_ERR(fsl_lpspi->clk_ipg); - goto out_controller_put; + return ret; } /* enable the clock */ ret = fsl_lpspi_init_rpm(fsl_lpspi); if (ret) - goto out_controller_put; + return ret; ret = pm_runtime_get_sync(fsl_lpspi->dev); if (ret < 0) { @@ -945,8 +945,6 @@ out_pm_get: pm_runtime_dont_use_autosuspend(fsl_lpspi->dev); pm_runtime_put_sync(fsl_lpspi->dev); pm_runtime_disable(fsl_lpspi->dev); -out_controller_put: - spi_controller_put(controller); return ret; } diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index 969965d7bc98..cc18d320370f 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -725,6 +725,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev, sizeof(struct pci1xxxx_spi_internal), GFP_KERNEL); + if (!spi_bus->spi_int[iter]) + return -ENOMEM; spi_sub_ptr = spi_bus->spi_int[iter]; spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller)); if (!spi_sub_ptr->spi_host) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 9fcbe040cb2f..f726d8670428 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -430,7 +430,7 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host, struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host); if (sdd->rx_dma.ch && sdd->tx_dma.ch) - return xfer->len > sdd->fifo_depth; + return xfer->len >= sdd->fifo_depth; return false; } @@ -826,10 +826,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host, return status; } - if (!is_polling(sdd) && (xfer->len > fifo_len) && + if (!is_polling(sdd) && xfer->len >= fifo_len && sdd->rx_dma.ch && sdd->tx_dma.ch) { use_dma = 1; - } else if (xfer->len >= fifo_len) { tx_buf = xfer->tx_buf; rx_buf = xfer->rx_buf; diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index 258aa0e37f55..4c3684dd902e 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -937,8 +937,9 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 679720021183..d9a6242264b7 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -583,7 +583,7 @@ int iscsit_dataout_datapduinorder_no_fbit( struct iscsi_pdu *pdu) { int i, send_recovery_r2t = 0, recovery = 0; - u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0; + u32 length = 0, offset = 0, pdu_count = 0; struct iscsit_conn *conn = cmd->conn; struct iscsi_pdu *first_pdu = NULL; @@ -596,7 +596,6 @@ int iscsit_dataout_datapduinorder_no_fbit( if (cmd->pdu_list[i].seq_no == pdu->seq_no) { if (!first_pdu) first_pdu = &cmd->pdu_list[i]; - xfer_len += cmd->pdu_list[i].length; pdu_count++; } else if (pdu_count) break; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index c1fbcdd16182..c40217f44b1b 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void) { struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; + struct cred *kern_cred; + const struct cred *old_cred; int ret; pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage" @@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + /* We use the kernel credentials to access the target directory */ + kern_cred = prepare_kernel_cred(&init_task); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); target_init_dbroot(); + revert_creds(old_cred); + put_cred(kern_cred); return 0; out: + target_xcopy_release_pt(); configfs_unregister_subsystem(subsys); core_dev_release_virtual_lun0(); rd_module_exit(); diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 50dec24e967a..8fd7cf1932cd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -214,7 +214,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { - state = dfc->capped_state; + state = dfc->max_state - dfc->capped_state; /* Convert EM power into milli-Watts first */ rcu_read_lock(); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 1b17dc4c219c..e25e48d76aa7 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -606,7 +606,7 @@ static int allocate_actors_buffer(struct power_allocator_params *params, /* There might be no cooling devices yet. */ if (!num_actors) { - ret = -EINVAL; + ret = 0; goto clean_state; } @@ -679,11 +679,6 @@ static int power_allocator_bind(struct thermal_zone_device *tz) return -ENOMEM; get_governor_trips(tz, params); - if (!params->trip_max) { - dev_warn(&tz->device, "power_allocator: missing trip_max\n"); - kfree(params); - return -EINVAL; - } ret = check_power_actors(tz, params); if (ret < 0) { @@ -714,9 +709,10 @@ static int power_allocator_bind(struct thermal_zone_device *tz) else params->sustainable_power = tz->tzp->sustainable_power; - estimate_pid_constants(tz, tz->tzp->sustainable_power, - params->trip_switch_on, - params->trip_max->temperature); + if (params->trip_max) + estimate_pid_constants(tz, tz->tzp->sustainable_power, + params->trip_switch_on, + params->trip_max->temperature); reset_pid_controller(params); diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index c617e8b9f0dd..d78d54ae2605 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -616,6 +616,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, tze->trip_stats[trip_id].timestamp = now; tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature); tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature); + tze->trip_stats[trip_id].count++; tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg + (temperature - tze->trip_stats[trip_id].avg) / tze->trip_stats[trip_id].count; diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index 09f6050dd041..497abf0d47ca 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz) { const struct thermal_trip *trip; int low = -INT_MAX, high = INT_MAX; - bool same_trip = false; int ret; lockdep_assert_held(&tz->lock); @@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip(tz, trip) { - bool low_set = false; int trip_low; trip_low = trip->temperature - trip->hysteresis; - if (trip_low < tz->temperature && trip_low > low) { + if (trip_low < tz->temperature && trip_low > low) low = trip_low; - low_set = true; - same_trip = false; - } if (trip->temperature > tz->temperature && - trip->temperature < high) { + trip->temperature < high) high = trip->temperature; - same_trip = low_set; - } } /* No need to change trip points */ if (tz->prev_low_trip == low && tz->prev_high_trip == high) return; - /* - * If "high" and "low" are the same, skip the change unless this is the - * first time. - */ - if (same_trip && (tz->prev_low_trip != -INT_MAX || - tz->prev_high_trip != INT_MAX)) - return; - tz->prev_low_trip = low; tz->prev_high_trip = high; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 6ffc4e81ffed..326433df5880 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3180,22 +3180,29 @@ void tb_switch_unconfigure_link(struct tb_switch *sw) { struct tb_port *up, *down; - if (sw->is_unplugged) - return; if (!tb_route(sw) || tb_switch_is_icm(sw)) return; + /* + * Unconfigure downstream port so that wake-on-connect can be + * configured after router unplug. No need to unconfigure upstream port + * since its router is unplugged. + */ + up = tb_upstream_port(sw); + down = up->remote; + if (tb_switch_is_usb4(down->sw)) + usb4_port_unconfigure(down); + else + tb_lc_unconfigure_port(down); + + if (sw->is_unplugged) + return; + up = tb_upstream_port(sw); if (tb_switch_is_usb4(up->sw)) usb4_port_unconfigure(up); else tb_lc_unconfigure_port(up); - - down = up->remote; - if (tb_switch_is_usb4(down->sw)) - usb4_port_unconfigure(down); - else - tb_lc_unconfigure_port(down); } static void tb_switch_credits_init(struct tb_switch *sw) @@ -3441,7 +3448,26 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) return tb_lc_set_wake(sw, flags); } -int tb_switch_resume(struct tb_switch *sw) +static void tb_switch_check_wakes(struct tb_switch *sw) +{ + if (device_may_wakeup(&sw->dev)) { + if (tb_switch_is_usb4(sw)) + usb4_switch_check_wakes(sw); + } +} + +/** + * tb_switch_resume() - Resume a switch after sleep + * @sw: Switch to resume + * @runtime: Is this resume from runtime suspend or system sleep + * + * Resumes and re-enumerates router (and all its children), if still plugged + * after suspend. Don't enumerate device router whose UID was changed during + * suspend. If this is resume from system sleep, notifies PM core about the + * wakes occurred during suspend. Disables all wakes, except USB4 wake of + * upstream port for USB4 routers that shall be always enabled. + */ +int tb_switch_resume(struct tb_switch *sw, bool runtime) { struct tb_port *port; int err; @@ -3490,6 +3516,9 @@ int tb_switch_resume(struct tb_switch *sw) if (err) return err; + if (!runtime) + tb_switch_check_wakes(sw); + /* Disable wakes */ tb_switch_set_wake(sw, 0); @@ -3519,7 +3548,8 @@ int tb_switch_resume(struct tb_switch *sw) */ if (tb_port_unlock(port)) tb_port_warn(port, "failed to unlock port\n"); - if (port->remote && tb_switch_resume(port->remote->sw)) { + if (port->remote && + tb_switch_resume(port->remote->sw, runtime)) { tb_port_warn(port, "lost during suspend, disconnecting\n"); tb_sw_set_unplugged(port->remote->sw); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index c5ce7a694b27..3e44c78ac409 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1801,6 +1801,12 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in) continue; } + /* Needs to be on different routers */ + if (in->sw == port->sw) { + tb_port_dbg(port, "skipping DP OUT on same router\n"); + continue; + } + tb_port_dbg(port, "DP OUT available\n"); /* @@ -2936,7 +2942,7 @@ static int tb_resume_noirq(struct tb *tb) if (!tb_switch_is_usb4(tb->root_switch)) tb_switch_reset(tb->root_switch); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, false); tb_free_invalid_tunnels(tb); tb_free_unplugged_children(tb->root_switch); tb_restore_children(tb->root_switch); @@ -3062,7 +3068,7 @@ static int tb_runtime_resume(struct tb *tb) struct tb_tunnel *tunnel, *n; mutex_lock(&tb->lock); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, true); tb_free_invalid_tunnels(tb); tb_restore_children(tb->root_switch); list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index feed8ecaf712..18aae4ccaed5 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -827,7 +827,7 @@ int tb_switch_configuration_valid(struct tb_switch *sw); int tb_switch_add(struct tb_switch *sw); void tb_switch_remove(struct tb_switch *sw); void tb_switch_suspend(struct tb_switch *sw, bool runtime); -int tb_switch_resume(struct tb_switch *sw); +int tb_switch_resume(struct tb_switch *sw, bool runtime); int tb_switch_reset(struct tb_switch *sw); int tb_switch_wait_for_bit(struct tb_switch *sw, u32 offset, u32 bit, u32 value, int timeout_msec); @@ -1288,6 +1288,7 @@ static inline bool tb_switch_is_usb4(const struct tb_switch *sw) return usb4_switch_version(sw) > 0; } +void usb4_switch_check_wakes(struct tb_switch *sw); int usb4_switch_setup(struct tb_switch *sw); int usb4_switch_configuration_valid(struct tb_switch *sw); int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid); diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 9860b49d7a2b..78b06e922fda 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -155,7 +155,13 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode, tx_dwords, rx_data, rx_dwords); } -static void usb4_switch_check_wakes(struct tb_switch *sw) +/** + * usb4_switch_check_wakes() - Check for wakes and notify PM core about them + * @sw: Router whose wakes to check + * + * Checks wakes occurred during suspend and notify the PM core about them. + */ +void usb4_switch_check_wakes(struct tb_switch *sw) { bool wakeup_usb4 = false; struct usb4_port *usb4; @@ -163,9 +169,6 @@ static void usb4_switch_check_wakes(struct tb_switch *sw) bool wakeup = false; u32 val; - if (!device_may_wakeup(&sw->dev)) - return; - if (tb_route(sw)) { if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1)) return; @@ -244,8 +247,6 @@ int usb4_switch_setup(struct tb_switch *sw) u32 val = 0; int ret; - usb4_switch_check_wakes(sw); - if (!tb_route(sw)) return 0; diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a3acbf0f5da1..1300c92b8702 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -356,9 +356,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, long rate; int ret; + clk_disable_unprepare(d->clk); rate = clk_round_rate(d->clk, newrate); - if (rate > 0 && p->uartclk != rate) { - clk_disable_unprepare(d->clk); + if (rate > 0) { /* * Note that any clock-notifer worker will block in * serial8250_update_uartclk() until we are done. @@ -366,8 +366,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, ret = clk_set_rate(d->clk, newrate); if (!ret) p->uartclk = rate; - clk_prepare_enable(d->clk); } + clk_prepare_enable(d->clk); dw8250_do_set_termios(p, termios, old); } diff --git a/drivers/tty/serial/8250/8250_lpc18xx.c b/drivers/tty/serial/8250/8250_lpc18xx.c index 7984ee05af1d..47e1a056a60c 100644 --- a/drivers/tty/serial/8250/8250_lpc18xx.c +++ b/drivers/tty/serial/8250/8250_lpc18xx.c @@ -151,7 +151,7 @@ static int lpc18xx_serial_probe(struct platform_device *pdev) ret = uart_read_port_properties(&uart.port); if (ret) - return ret; + goto dis_uart_clk; uart.port.iotype = UPIO_MEM32; uart.port.regshift = 2; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 0d35c77fad9e..e2e4f99f9d34 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5010,12 +5010,6 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATRO_B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_115200 }, - { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_A, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_b0_bt_2_115200 }, - { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_B, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_b0_bt_2_115200 }, { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_OCTO_A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_4_460800 }, diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 4749331fe618..1e8853eae504 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port, static irqreturn_t mxs_auart_irq_handle(int irq, void *context) { - u32 istat; + u32 istat, stat; struct mxs_auart_port *s = context; u32 mctrl_temp = s->mctrl_prev; - u32 stat = mxs_read(s, REG_STAT); + uart_port_lock(&s->port); + + stat = mxs_read(s, REG_STAT); istat = mxs_read(s, REG_INTR); /* ack irq */ @@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context) istat &= ~AUART_INTR_TXIS; } + uart_port_unlock(&s->port); + return IRQ_HANDLED; } diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index 05d97e89511e..92195f984de1 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -210,7 +210,6 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) { struct tty_port *port; unsigned char ch, r1, drop, flag; - int loops = 0; /* Sanity check, make sure the old bug is no longer happening */ if (uap->port.state == NULL) { @@ -291,24 +290,11 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) if (r1 & Rx_OVR) tty_insert_flip_char(port, 0, TTY_OVERRUN); next_char: - /* We can get stuck in an infinite loop getting char 0 when the - * line is in a wrong HW state, we break that here. - * When that happens, I disable the receive side of the driver. - * Note that what I've been experiencing is a real irq loop where - * I'm getting flooded regardless of the actual port speed. - * Something strange is going on with the HW - */ - if ((++loops) > 1000) - goto flood; ch = read_zsreg(uap, R0); if (!(ch & Rx_CH_AV)) break; } - return true; - flood: - pmz_interrupt_control(uap, 0); - pmz_error("pmz: rx irq flood !\n"); return true; } diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h index c74c548f0db6..b6c38d2edfd4 100644 --- a/drivers/tty/serial/serial_base.h +++ b/drivers/tty/serial/serial_base.h @@ -22,6 +22,7 @@ struct serial_ctrl_device { struct serial_port_device { struct device dev; struct uart_port *port; + unsigned int tx_enabled:1; }; int serial_base_ctrl_init(void); @@ -30,6 +31,9 @@ void serial_base_ctrl_exit(void); int serial_base_port_init(void); void serial_base_port_exit(void); +void serial_base_port_startup(struct uart_port *port); +void serial_base_port_shutdown(struct uart_port *port); + int serial_base_driver_register(struct device_driver *driver); void serial_base_driver_unregister(struct device_driver *driver); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ff85ebd3a007..c476d884356d 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -156,7 +156,7 @@ static void __uart_start(struct uart_state *state) * enabled, serial_port_runtime_resume() calls start_tx() again * after enabling the device. */ - if (pm_runtime_active(&port_dev->dev)) + if (!pm_runtime_enabled(port->dev) || pm_runtime_active(&port_dev->dev)) port->ops->start_tx(port); pm_runtime_mark_last_busy(&port_dev->dev); pm_runtime_put_autosuspend(&port_dev->dev); @@ -323,16 +323,26 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, bool init_hw) { struct tty_port *port = &state->port; + struct uart_port *uport; int retval; if (tty_port_initialized(port)) - return 0; + goto out_base_port_startup; retval = uart_port_startup(tty, state, init_hw); - if (retval) + if (retval) { set_bit(TTY_IO_ERROR, &tty->flags); + return retval; + } - return retval; +out_base_port_startup: + uport = uart_port_check(state); + if (!uport) + return -EIO; + + serial_base_port_startup(uport); + + return 0; } /* @@ -355,6 +365,9 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) if (tty) set_bit(TTY_IO_ERROR, &tty->flags); + if (uport) + serial_base_port_shutdown(uport); + if (tty_port_initialized(port)) { tty_port_set_initialized(port, false); @@ -1775,6 +1788,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) uport->ops->stop_rx(uport); uart_port_unlock_irq(uport); + serial_base_port_shutdown(uport); uart_port_shutdown(port); /* @@ -1788,6 +1802,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) * Free the transmit buffer. */ uart_port_lock_irq(uport); + uart_circ_clear(&state->xmit); buf = state->xmit.buf; state->xmit.buf = NULL; uart_port_unlock_irq(uport); diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index 22b9eeb23e68..7e3a1c7b097c 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -39,8 +39,12 @@ static int serial_port_runtime_resume(struct device *dev) /* Flush any pending TX for the port */ uart_port_lock_irqsave(port, &flags); + if (!port_dev->tx_enabled) + goto unlock; if (__serial_port_busy(port)) port->ops->start_tx(port); + +unlock: uart_port_unlock_irqrestore(port, flags); out: @@ -60,6 +64,11 @@ static int serial_port_runtime_suspend(struct device *dev) return 0; uart_port_lock_irqsave(port, &flags); + if (!port_dev->tx_enabled) { + uart_port_unlock_irqrestore(port, flags); + return 0; + } + busy = __serial_port_busy(port); if (busy) port->ops->start_tx(port); @@ -71,6 +80,31 @@ static int serial_port_runtime_suspend(struct device *dev) return busy ? -EBUSY : 0; } +static void serial_base_port_set_tx(struct uart_port *port, + struct serial_port_device *port_dev, + bool enabled) +{ + unsigned long flags; + + uart_port_lock_irqsave(port, &flags); + port_dev->tx_enabled = enabled; + uart_port_unlock_irqrestore(port, flags); +} + +void serial_base_port_startup(struct uart_port *port) +{ + struct serial_port_device *port_dev = port->port_dev; + + serial_base_port_set_tx(port, port_dev, true); +} + +void serial_base_port_shutdown(struct uart_port *port) +{ + struct serial_port_device *port_dev = port->port_dev; + + serial_base_port_set_tx(port, port_dev, false); +} + static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm, serial_port_runtime_suspend, serial_port_runtime_resume, NULL); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 58d169e5c1db..4fa5a03ebac0 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -861,6 +861,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; u32 sr; unsigned int size; + irqreturn_t ret = IRQ_NONE; sr = readl_relaxed(port->membase + ofs->isr); @@ -869,11 +870,14 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) (sr & USART_SR_TC)) { stm32_usart_tc_interrupt_disable(port); stm32_usart_rs485_rts_disable(port); + ret = IRQ_HANDLED; } - if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) + if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) { writel_relaxed(USART_ICR_RTOCF, port->membase + ofs->icr); + ret = IRQ_HANDLED; + } if ((sr & USART_SR_WUF) && ofs->icr != UNDEF_REG) { /* Clear wake up flag and disable wake up interrupt */ @@ -882,6 +886,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_WUFIE); if (irqd_is_wakeup_set(irq_get_irq_data(port->irq))) pm_wakeup_event(tport->tty->dev, 0); + ret = IRQ_HANDLED; } /* @@ -896,6 +901,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } } @@ -903,6 +909,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_port_lock(port); stm32_usart_transmit_chars(port); uart_port_unlock(port); + ret = IRQ_HANDLED; } /* Receiver timeout irq for DMA RX */ @@ -912,9 +919,10 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } - return IRQ_HANDLED; + return ret; } static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl) @@ -1084,6 +1092,7 @@ static int stm32_usart_startup(struct uart_port *port) val |= USART_CR2_SWAP; writel_relaxed(val, port->membase + ofs->cr2); } + stm32_port->throttled = false; /* RX FIFO Flush */ if (ofs->rqr != UNDEF_REG) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 8db81f1a12d5..768bf87cd80d 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -94,7 +94,7 @@ void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds) val = ufshcd_readl(hba, REG_UFS_MCQ_CFG); val &= ~MCQ_CFG_MAC_MASK; - val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds); + val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds - 1); ufshcd_writel(hba, val, REG_UFS_MCQ_CFG); } EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e30fd125988d..a0f8e930167d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3217,7 +3217,9 @@ retry: /* MCQ mode */ if (is_mcq_enabled(hba)) { - err = ufshcd_clear_cmd(hba, lrbp->task_tag); + /* successfully cleared the command, retry if needed */ + if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) + err = -EAGAIN; hba->dev_cmd.complete = NULL; return err; } @@ -9791,7 +9793,10 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* UFS device & link must be active before we enter in this function */ if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) { - ret = -EINVAL; + /* Wait err handler finish or trigger err recovery */ + if (!ufshcd_eh_in_progress(hba)) + ufshcd_force_error_recovery(hba); + ret = -EBUSY; goto enable_scaling; } diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 8d68bd21ae73..7a00004bfd03 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -47,7 +47,7 @@ enum { TSTBUS_MAX, }; -#define QCOM_UFS_MAX_GEAR 4 +#define QCOM_UFS_MAX_GEAR 5 #define QCOM_UFS_MAX_LANE 2 enum { @@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table { [MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844, 1000 }, [MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688, 1000 }, [MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376, 1000 }, + [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752, 1000 }, [MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844, 1000 }, [MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688, 1000 }, [MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376, 1000 }, [MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752, 1000 }, + [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504, 1000 }, [MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796, 1000 }, [MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591, 1000 }, [MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 }, [MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 }, + [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 }, [MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591, 1000 }, [MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181, 1000 }, [MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, [MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, + [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, [MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422, 1000 }, [MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189, 1000 }, [MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 }, [MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 }, + [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 }, [MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189, 1000 }, [MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378, 1000 }, [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 }, [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 }, + [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 }, [MODE_MAX][0][0] = { 7643136, 307200 }, }; @@ -1210,8 +1216,10 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up) list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk) && - !strcmp(clki->name, "core_clk_unipro")) { - if (is_scale_up) + !strcmp(clki->name, "core_clk_unipro")) { + if (!clki->max_freq) + cycles_in_1us = 150; /* default for backwards compatibility */ + else if (is_scale_up) cycles_in_1us = ceil(clki->max_freq, (1000 * 1000)); else cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000)); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index bb77de6fa067..009158fef2a8 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -792,7 +792,7 @@ static int uio_mmap_dma_coherent(struct vm_area_struct *vma) */ vma->vm_pgoff = 0; - addr = (void *)mem->addr; + addr = (void *)(uintptr_t)mem->addr; ret = dma_mmap_coherent(mem->dma_device, vma, addr, diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index d5f9384df125..13cc35ab5d29 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -60,7 +60,7 @@ static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode) addr = dma_alloc_coherent(&priv->pdev->dev, uiomem->size, &uiomem->dma_addr, GFP_KERNEL); - uiomem->addr = addr ? (phys_addr_t) addr : DMEM_MAP_ERROR; + uiomem->addr = addr ? (uintptr_t) addr : DMEM_MAP_ERROR; ++uiomem; } priv->refcnt++; @@ -89,7 +89,7 @@ static int uio_dmem_genirq_release(struct uio_info *info, struct inode *inode) break; if (uiomem->addr) { dma_free_coherent(uiomem->dma_device, uiomem->size, - (void *) uiomem->addr, + (void *) (uintptr_t) uiomem->addr, uiomem->dma_addr); } uiomem->addr = DMEM_MAP_ERROR; diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 20d9762331bd..6be3462b109f 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata) { if (pdata->send_gpadl.gpadl_handle) { vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl); - vfree(pdata->send_buf); + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); } if (pdata->recv_gpadl.gpadl_handle) { vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl); - vfree(pdata->recv_buf); + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); } } @@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->recv_buf, RECV_BUFFER_SIZE, &pdata->recv_gpadl); if (ret) { - vfree(pdata->recv_buf); + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); goto fail_close; } @@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->send_buf, SEND_BUFFER_SIZE, &pdata->send_gpadl); if (ret) { - vfree(pdata->send_buf); + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); goto fail_close; } diff --git a/drivers/uio/uio_pruss.c b/drivers/uio/uio_pruss.c index 72b33f7d4c40..f67881cba645 100644 --- a/drivers/uio/uio_pruss.c +++ b/drivers/uio/uio_pruss.c @@ -191,7 +191,7 @@ static int pruss_probe(struct platform_device *pdev) p->mem[1].size = sram_pool_sz; p->mem[1].memtype = UIO_MEM_PHYS; - p->mem[2].addr = (phys_addr_t) gdev->ddr_vaddr; + p->mem[2].addr = (uintptr_t) gdev->ddr_vaddr; p->mem[2].dma_addr = gdev->ddr_paddr; p->mem[2].size = extram_pool_sz; p->mem[2].memtype = UIO_MEM_DMA_COHERENT; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ee8455585b6..9446660e231b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -130,7 +130,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -720,14 +719,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1095,7 +1094,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1343,7 +1342,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1759,6 +1758,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1807,7 +1816,7 @@ static void hub_disconnect(struct usb_interface *intf) onboard_hub_destroy_pdevs(&hub->onboard_hub_devs); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -5934,7 +5943,7 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 43ce21c96a51..183b69dc2955 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -129,6 +129,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 5b5e613a11e5..0e1262a077ae 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = kstrtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } @@ -419,8 +449,10 @@ static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); - if (port_dev->child) + if (port_dev->child) { usb_disable_usb2_hardware_lpm(port_dev->child); + usb_unlocked_disable_lpm(port_dev->child); + } } static const struct dev_pm_ops usb_port_pm_ops = { diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index f98263e21c2a..d83231d6736a 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (kstrtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index c92a1da46a01..a141f83aba0c 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -729,8 +729,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -738,8 +744,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; @@ -1086,6 +1098,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a @@ -1323,6 +1336,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 158ede753854..26d752a4c3ca 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -322,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -334,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } - udelay(1); - } while (++i < 200); + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); + + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } + + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -385,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } @@ -408,7 +430,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -425,7 +448,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index b517a7216de2..b2f6da5b65cc 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1415,6 +1415,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", @@ -3727,6 +3731,12 @@ irq_retry: if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 35c7a4df8e71..dd5b1c5691e1 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = @@ -4649,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else @@ -5406,9 +5414,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5442,10 +5457,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); @@ -5610,10 +5632,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5638,6 +5662,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 6b4d825e97a2..994a78ad084b 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -866,20 +866,27 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb; u16 remain = 0; int rc = 0; if (!qtd->urb) return -EINVAL; + usb_urb = qtd->urb->priv; + dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx * sizeof(struct dwc2_dma_desc)), sizeof(struct dwc2_dma_desc), DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -900,7 +907,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1005,11 +1012,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 13abdd5f6752..12f8c7f86dc9 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -698,7 +698,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index b1d48019e944..7b84416dfc2b 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -331,7 +331,7 @@ static void dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 3e55838c0001..31684cdaaae3 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) else dwc->sysdev = dwc->dev; + dwc->sys_wakeup = device_may_wakeup(dwc->sysdev); + ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name); if (ret >= 0) { dwc->usb_psy = power_supply_get_by_name(usb_psy_name); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c07edfc954f7..7e80dd3d466b 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array { * 3 - Reserved * @dis_metastability_quirk: set to disable metastability quirk. * @dis_split_quirk: set to disable split boundary. + * @sys_wakeup: set if the device may do system wakeup. * @wakeup_configured: set if the device is configured for remote wakeup. * @suspended: set to track suspend event due to U3/L2. * @imod_interval: set the interrupt moderation interval in 250ns @@ -1357,6 +1358,7 @@ struct dwc3 { unsigned dis_split_quirk:1; unsigned async_callbacks:1; + unsigned sys_wakeup:1; unsigned wakeup_configured:1; unsigned suspended:1; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 39564e17f3b0..497deed38c0c 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -51,7 +51,6 @@ #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e -#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1 #define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -423,7 +422,6 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, - { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 72bb722da2f2..d96ffbe52039 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -226,7 +226,8 @@ void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) /* reinitialize physical ep1 */ dep = dwc->eps[1]; - dep->flags = DWC3_EP_ENABLED; + dep->flags &= DWC3_EP_RESOURCE_ALLOCATED; + dep->flags |= DWC3_EP_ENABLED; /* stall is always issued on EP0 */ dep = dwc->eps[0]; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 40c52dbc28d3..4df2661f6675 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g, dwc->gadget_driver = driver; spin_unlock_irqrestore(&dwc->lock, flags); + if (dwc->sys_wakeup) + device_wakeup_enable(dwc->sysdev); + return 0; } @@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g) struct dwc3 *dwc = gadget_to_dwc(g); unsigned long flags; + if (dwc->sys_wakeup) + device_wakeup_disable(dwc->sysdev); + spin_lock_irqsave(&dwc->lock, flags); dwc->gadget_driver = NULL; dwc->max_cfg_eps = 0; @@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc) else dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed); + /* No system wakeup if no gadget driver bound */ + if (dwc->sys_wakeup) + device_wakeup_disable(dwc->sysdev); + return 0; err5: diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 5a5cb6ce9946..0204787df81d 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } + if (dwc->sys_wakeup) { + /* Restore wakeup setting if switched from device */ + device_wakeup_enable(dwc->sysdev); + + /* Pass on wakeup setting to the new xhci platform device */ + device_init_wakeup(&xhci->dev, true); + } + return 0; err: platform_device_put(xhci); @@ -181,6 +189,9 @@ err: void dwc3_host_exit(struct dwc3 *dwc) { + if (dwc->sys_wakeup) + device_init_wakeup(&dwc->xhci->dev, false); + platform_device_unregister(dwc->xhci); dwc->xhci = NULL; } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index bffbc1dc651f..f855f1fc8e5e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -46,6 +46,8 @@ #define FUNCTIONFS_MAGIC 0xa647361 /* Chosen by a honest dice roll ;) */ +#define DMABUF_ENQUEUE_TIMEOUT_MS 5000 + MODULE_IMPORT_NS(DMA_BUF); /* Reference counter handling */ @@ -1578,10 +1580,13 @@ static int ffs_dmabuf_transfer(struct file *file, struct ffs_dmabuf_priv *priv; struct ffs_dma_fence *fence; struct usb_request *usb_req; + enum dma_resv_usage resv_dir; struct dma_buf *dmabuf; + unsigned long timeout; struct ffs_ep *ep; bool cookie; u32 seqno; + long retl; int ret; if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK) @@ -1615,17 +1620,14 @@ static int ffs_dmabuf_transfer(struct file *file, goto err_attachment_put; /* Make sure we don't have writers */ - if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) { - pr_vdebug("FFS WRITE fence is not signaled\n"); - ret = -EBUSY; - goto err_resv_unlock; - } - - /* If we're writing to the DMABUF, make sure we don't have readers */ - if (epfile->in && - !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) { - pr_vdebug("FFS READ fence is not signaled\n"); - ret = -EBUSY; + timeout = nonblock ? 0 : msecs_to_jiffies(DMABUF_ENQUEUE_TIMEOUT_MS); + retl = dma_resv_wait_timeout(dmabuf->resv, + dma_resv_usage_rw(epfile->in), + true, timeout); + if (retl == 0) + retl = -EBUSY; + if (retl < 0) { + ret = (int)retl; goto err_resv_unlock; } @@ -1665,8 +1667,9 @@ static int ffs_dmabuf_transfer(struct file *file, dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops, &priv->lock, priv->context, seqno); - dma_resv_add_fence(dmabuf->resv, &fence->base, - dma_resv_usage_rw(epfile->in)); + resv_dir = epfile->in ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; + + dma_resv_add_fence(dmabuf->resv, &fence->base, resv_dir); dma_resv_unlock(dmabuf->resv); /* Now that the dma_fence is in place, queue the transfer. */ diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 28f4e6552e84..0acc32ed9960 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -878,7 +878,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { DBG(cdev, "reset ncm\n"); ncm->netdev = NULL; gether_disconnect(&ncm->port); @@ -1367,7 +1367,7 @@ static void ncm_disable(struct usb_function *f) DBG(cdev, "ncm deactivated\n"); - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { ncm->netdev = NULL; gether_disconnect(&ncm->port); } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 9d4150124fdb..b3a9d18a8dcd 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index e82d03224f94..3432ebfae978 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -868,7 +868,7 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) { struct fsl_ep *ep = container_of(_ep, struct fsl_ep, ep); struct fsl_req *req = container_of(_req, struct fsl_req, req); - struct fsl_udc *udc; + struct fsl_udc *udc = ep->udc; unsigned long flags; int ret; @@ -878,7 +878,7 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) dev_vdbg(&udc->gadget.dev, "%s, bad params\n", __func__); return -EINVAL; } - if (unlikely(!_ep || !ep->ep.desc)) { + if (unlikely(!ep->ep.desc)) { dev_vdbg(&udc->gadget.dev, "%s, bad ep\n", __func__); return -EINVAL; } @@ -887,7 +887,6 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) return -EMSGSIZE; } - udc = ep->udc; if (!udc->driver || udc->gadget.speed == USB_SPEED_UNKNOWN) return -ESHUTDOWN; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 52278afea94b..575f0fd9c9f1 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3133,7 +3133,7 @@ static int xhci_handle_events(struct xhci_hcd *xhci, struct xhci_interrupter *ir irqreturn_t xhci_irq(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); - irqreturn_t ret = IRQ_NONE; + irqreturn_t ret = IRQ_HANDLED; u32 status; spin_lock(&xhci->lock); @@ -3141,12 +3141,13 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) status = readl(&xhci->op_regs->status); if (status == ~(u32)0) { xhci_hc_died(xhci); - ret = IRQ_HANDLED; goto out; } - if (!(status & STS_EINT)) + if (!(status & STS_EINT)) { + ret = IRQ_NONE; goto out; + } if (status & STS_HCE) { xhci_warn(xhci, "WARNING: Host Controller Error\n"); @@ -3156,7 +3157,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (status & STS_FATAL) { xhci_warn(xhci, "WARNING: Host System Error\n"); xhci_halt(xhci); - ret = IRQ_HANDLED; goto out; } @@ -3167,7 +3167,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) */ status |= STS_EINT; writel(status, &xhci->op_regs->status); - ret = IRQ_HANDLED; /* This is the handler of the primary interrupter */ xhci_handle_events(xhci, xhci->interrupters[0]); diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 1740000d54c2..5762564b9d73 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -172,8 +172,7 @@ DECLARE_EVENT_CLASS(xhci_log_free_virt_dev, __field(void *, vdev) __field(unsigned long long, out_ctx) __field(unsigned long long, in_ctx) - __field(int, hcd_portnum) - __field(int, hw_portnum) + __field(int, slot_id) __field(u16, current_mel) ), @@ -181,13 +180,12 @@ DECLARE_EVENT_CLASS(xhci_log_free_virt_dev, __entry->vdev = vdev; __entry->in_ctx = (unsigned long long) vdev->in_ctx->dma; __entry->out_ctx = (unsigned long long) vdev->out_ctx->dma; - __entry->hcd_portnum = (int) vdev->rhub_port->hcd_portnum; - __entry->hw_portnum = (int) vdev->rhub_port->hw_portnum; + __entry->slot_id = (int) vdev->slot_id; __entry->current_mel = (u16) vdev->current_mel; ), - TP_printk("vdev %p ctx %llx | %llx hcd_portnum %d hw_portnum %d current_mel %d", - __entry->vdev, __entry->in_ctx, __entry->out_ctx, - __entry->hcd_portnum, __entry->hw_portnum, __entry->current_mel + TP_printk("vdev %p slot %d ctx %llx | %llx current_mel %d", + __entry->vdev, __entry->slot_id, __entry->in_ctx, + __entry->out_ctx, __entry->current_mel ) ); diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index c6101ed2d9d4..d8049275a023 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -78,7 +78,7 @@ static int onboard_hub_power_on(struct onboard_hub *hub) err = regulator_bulk_enable(hub->pdata->num_supplies, hub->supplies); if (err) { dev_err(hub->dev, "failed to enable supplies: %pe\n", ERR_PTR(err)); - return err; + goto disable_clk; } fsleep(hub->pdata->reset_us); @@ -87,6 +87,10 @@ static int onboard_hub_power_on(struct onboard_hub *hub) hub->is_powered_on = true; return 0; + +disable_clk: + clk_disable_unprepare(hub->clk); + return err; } static int onboard_hub_power_off(struct onboard_hub *hub) diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 35770e608c64..2d30fc1be306 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -518,8 +518,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id, int ret; client = kzalloc(sizeof *client, GFP_KERNEL); - if (!client) + if (!client) { + kfree(data); return -ENOMEM; + } client->type = type; client->id = id; @@ -535,8 +537,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id, auxdev->dev.release = ljca_auxdev_release; ret = auxiliary_device_init(auxdev); - if (ret) + if (ret) { + kfree(data); goto err_free; + } ljca_auxdev_acpi_bind(adap, auxdev, adr, id); @@ -590,12 +594,8 @@ static int ljca_enumerate_gpio(struct ljca_adapter *adap) valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins); bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num); - ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio", + return ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio", gpio_info, LJCA_GPIO_ACPI_ADR); - if (ret) - kfree(gpio_info); - - return ret; } static int ljca_enumerate_i2c(struct ljca_adapter *adap) @@ -629,10 +629,8 @@ static int ljca_enumerate_i2c(struct ljca_adapter *adap) ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i, "ljca-i2c", i2c_info, LJCA_I2C1_ACPI_ADR + i); - if (ret) { - kfree(i2c_info); + if (ret) return ret; - } } return 0; @@ -669,10 +667,8 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap) ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i, "ljca-spi", spi_info, LJCA_SPI1_ACPI_ADR + i); - if (ret) { - kfree(spi_info); + if (ret) return ret; - } } return 0; diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 8f735a86cd19..fdcffebf415c 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -262,13 +262,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), "could not get vbus regulator\n"); - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); - if (PTR_ERR(nop->vbus_draw) == -ENODEV) - nop->vbus_draw = NULL; - if (IS_ERR(nop->vbus_draw)) - return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), - "could not get vbus regulator\n"); - nop->dev = dev; nop->phy.dev = nop->dev; nop->phy.label = "nop-xceiv"; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 55a65d941ccb..8a5846d4adf6 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,10 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EM060K_128 0x0128 +#define QUECTEL_PRODUCT_EM060K_129 0x0129 +#define QUECTEL_PRODUCT_EM060K_12a 0x012a +#define QUECTEL_PRODUCT_EM060K_12b 0x012b +#define QUECTEL_PRODUCT_EM060K_12c 0x012c #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 @@ -1218,6 +1222,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, @@ -1360,6 +1376,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -2052,6 +2074,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff), .driver_info = RSVD(4) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */ + .driver_info = RSVD(0) | RSVD(4) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, @@ -2272,15 +2298,29 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 71ace274761f..08953f0d4532 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); - struct urb *urb; int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd) * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); + set_host_byte(cmnd, DID_NO_CONNECT); scsi_done(cmnd); goto zombie; } diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 389c7f0b8d93..9610e647a8d4 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1310,6 +1310,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, { struct typec_port *port = to_typec_port(dev); struct usb_power_delivery *pd; + int ret; if (!port->ops || !port->ops->pd_set) return -EOPNOTSUPP; @@ -1318,7 +1319,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, if (!pd) return -EINVAL; - return port->ops->pd_set(port, pd); + ret = port->ops->pd_set(port, pd); + if (ret) + return ret; + + return size; } static ssize_t select_usb_power_delivery_show(struct device *dev, diff --git a/drivers/usb/typec/mux/it5205.c b/drivers/usb/typec/mux/it5205.c index 5535932e42cd..4357cc67a867 100644 --- a/drivers/usb/typec/mux/it5205.c +++ b/drivers/usb/typec/mux/it5205.c @@ -22,7 +22,7 @@ #include #define IT5205_REG_CHIP_ID(x) (0x4 + (x)) -#define IT5205FN_CHIP_ID 0x35323035 /* "5205" */ +#define IT5205FN_CHIP_ID 0x35303235 /* "5025" -> "5205" */ /* MUX power down register */ #define IT5205_REG_MUXPDR 0x10 diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ae2b6c94482d..ab6ed6111ed0 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6855,14 +6855,14 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd) if (data->sink_desc.pdo[0]) { for (i = 0; i < PDO_MAX_OBJECTS && data->sink_desc.pdo[i]; i++) port->snk_pdo[i] = data->sink_desc.pdo[i]; - port->nr_snk_pdo = i + 1; + port->nr_snk_pdo = i; port->operating_snk_mw = data->operating_snk_mw; } if (data->source_desc.pdo[0]) { for (i = 0; i < PDO_MAX_OBJECTS && data->source_desc.pdo[i]; i++) - port->snk_pdo[i] = data->source_desc.pdo[i]; - port->nr_src_pdo = i + 1; + port->src_pdo[i] = data->source_desc.pdo[i]; + port->nr_src_pdo = i; } switch (port->state) { @@ -6910,7 +6910,9 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd) port->port_source_caps = data->source_cap; port->port_sink_caps = data->sink_cap; + typec_port_set_usb_power_delivery(p, NULL); port->selected_pd = pd; + typec_port_set_usb_power_delivery(p, port->selected_pd); unlock: mutex_unlock(&port->lock); return ret; @@ -6943,9 +6945,7 @@ static void tcpm_port_unregister_pd(struct tcpm_port *port) port->port_source_caps = NULL; for (i = 0; i < port->pd_count; i++) { usb_power_delivery_unregister_capabilities(port->pd_list[i]->sink_cap); - kfree(port->pd_list[i]->sink_cap); usb_power_delivery_unregister_capabilities(port->pd_list[i]->source_cap); - kfree(port->pd_list[i]->source_cap); devm_kfree(port->dev, port->pd_list[i]); port->pd_list[i] = NULL; usb_power_delivery_unregister(port->pds[i]); diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index cf52cb34d285..bd6ae92aa39e 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -151,8 +151,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) @@ -1133,17 +1137,21 @@ static int ucsi_check_cable(struct ucsi_connector *con) if (ret < 0) return ret; - ret = ucsi_get_cable_identity(con); - if (ret < 0) - return ret; + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) { + ret = ucsi_get_cable_identity(con); + if (ret < 0) + return ret; + } - ret = ucsi_register_plug(con); - if (ret < 0) - return ret; + if (con->ucsi->cap.features & UCSI_CAP_ALT_MODE_DETAILS) { + ret = ucsi_register_plug(con); + if (ret < 0) + return ret; - ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P); - if (ret < 0) - return ret; + ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P); + if (ret < 0) + return ret; + } return 0; } @@ -1189,8 +1197,10 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_register_partner(con); ucsi_partner_task(con, ucsi_check_connection, 1, HZ); ucsi_partner_task(con, ucsi_check_connector_capability, 1, HZ); - ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ); - ucsi_partner_task(con, ucsi_check_cable, 1, HZ); + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) + ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ); + if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS) + ucsi_partner_task(con, ucsi_check_cable, 1, HZ); if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) @@ -1215,11 +1225,11 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) ucsi_partner_task(con, ucsi_check_altmodes, 1, 0); - clear_bit(EVENT_PENDING, &con->ucsi->flags); - mutex_lock(&ucsi->ppm_lock); + clear_bit(EVENT_PENDING, &con->ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); mutex_unlock(&ucsi->ppm_lock); + if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); @@ -1237,7 +1247,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num) struct ucsi_connector *con = &ucsi->connector[num - 1]; if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) { - dev_dbg(ucsi->dev, "Bogus connector change event\n"); + dev_dbg(ucsi->dev, "Early connector change event\n"); return; } @@ -1260,13 +1270,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) @@ -1589,8 +1633,10 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) ucsi_register_partner(con); ucsi_pwr_opmode_change(con); ucsi_port_psy_changed(con); - ucsi_get_partner_identity(con); - ucsi_check_cable(con); + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) + ucsi_get_partner_identity(con); + if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS) + ucsi_check_cable(con); } /* Only notify USB controller if partner supports USB data */ @@ -1636,6 +1682,7 @@ static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con, *connector; u64 command, ntfy; + u32 cci; int ret; int i; @@ -1688,6 +1735,15 @@ static int ucsi_init(struct ucsi *ucsi) ucsi->connector = connector; ucsi->ntfy = ntfy; + + mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + mutex_unlock(&ucsi->ppm_lock); + if (ret) + return ret; + if (UCSI_CCI_CONNECTOR(cci)) + ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); + return 0; err_unregister: diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 32daf5f58650..0e7c92eb1b22 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -206,7 +206,7 @@ struct ucsi_capability { #define UCSI_CAP_ATTR_POWER_OTHER BIT(10) #define UCSI_CAP_ATTR_POWER_VBUS BIT(14) u8 num_connectors; - u8 features; + u16 features; #define UCSI_CAP_SET_UOM BIT(0) #define UCSI_CAP_SET_PDM BIT(1) #define UCSI_CAP_ALT_MODE_DETAILS BIT(2) @@ -215,7 +215,8 @@ struct ucsi_capability { #define UCSI_CAP_CABLE_DETAILS BIT(5) #define UCSI_CAP_EXT_SUPPLY_NOTIFICATIONS BIT(6) #define UCSI_CAP_PD_RESET BIT(7) - u16 reserved_1; +#define UCSI_CAP_GET_PD_MESSAGE BIT(8) + u8 reserved_1; u8 num_alt_modes; u8 reserved_2; u16 bc_version; diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 928eacbeb21a..7b3ac133ef86 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -23,10 +23,11 @@ struct ucsi_acpi { void *base; struct completion complete; unsigned long flags; +#define UCSI_ACPI_SUPPRESS_EVENT 0 +#define UCSI_ACPI_COMMAND_PENDING 1 +#define UCSI_ACPI_ACK_PENDING 2 guid_t guid; u64 cmd; - bool dell_quirk_probed; - bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, int ret; if (ack) - set_bit(ACK_PENDING, &ua->flags); + set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - set_bit(COMMAND_PENDING, &ua->flags); + set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, out_clear_bit: if (ack) - clear_bit(ACK_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - clear_bit(COMMAND_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); return ret; } @@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = { }; /* - * Some Dell laptops expect that an ACK command with the - * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) - * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. - * If this is not done events are not delivered to OSPM and - * subsequent commands will timeout. + * Some Dell laptops don't like ACK commands with the + * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE + * bit set. To work around this send a dummy command and bundle the + * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE + * for the dummy command. */ static int ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - u64 cmd = *(u64 *)val, ack = 0; + u64 cmd = *(u64 *)val; + u64 dummycmd = UCSI_GET_CAPABILITY; int ret; - if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && - cmd & UCSI_ACK_CONNECTOR_CHANGE) - ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) { + cmd |= UCSI_ACK_COMMAND_COMPLETE; - ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); - if (ret != 0) - return ret; - if (ack == 0) - return ret; + /* + * The UCSI core thinks it is sending a connector change ack + * and will accept new connector change events. We don't want + * this to happen for the dummy command as its response will + * still report the very event that the core is trying to clear. + */ + set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd, + sizeof(dummycmd)); + clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); - if (!ua->dell_quirk_probed) { - ua->dell_quirk_probed = true; - - cmd = UCSI_GET_CAPABILITY; - ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, - sizeof(cmd)); - if (ret == 0) - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, - &ack, sizeof(ack)); - if (ret != -ETIMEDOUT) + if (ret < 0) return ret; - - ua->dell_quirk_active = true; - dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); - dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); } - if (!ua->dell_quirk_active) - return ret; - - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); } static const struct ucsi_operations ucsi_dell_ops = { @@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; - if (UCSI_CCI_CONNECTOR(cci)) + if (UCSI_CCI_CONNECTOR(cci) && + !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) complete(&ua->complete); if (cci & UCSI_CCI_COMMAND_COMPLETE && - test_bit(COMMAND_PENDING, &ua->flags)) + test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 932e7bf69447..ce08eb33e5be 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -255,6 +255,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) static void pmic_glink_ucsi_register(struct work_struct *work) { struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work); + int orientation; + int i; + + for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { + if (!ucsi->port_orientation[i]) + continue; + orientation = gpiod_get_value(ucsi->port_orientation[i]); + + if (orientation >= 0) { + typec_switch_set(ucsi->port_switch[i], + orientation ? TYPEC_ORIENTATION_REVERSE + : TYPEC_ORIENTATION_NORMAL); + } + } ucsi_register(ucsi->ucsi); } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 045f666b4f12..8995730ce0bf 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, vq->avail_idx = vhost16_to_cpu(vq, avail_idx); if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { - vq_err(vq, "Guest moved used index from %u to %u", + vq_err(vq, "Guest moved avail index from %u to %u", last_avail_idx, vq->avail_idx); return -EFAULT; } @@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) r = vhost_get_avail_idx(vq, &avail_idx); if (unlikely(r)) return false; - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vq->avail_idx == vq->last_avail_idx; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return false; + } + + return true; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); @@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->avail->idx, r); return false; } - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vq->avail_idx != vq->last_avail_idx; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return true; + } + + return false; } EXPORT_SYMBOL_GPL(vhost_enable_notify); diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index e3179e987cdb..197b6d5268e9 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -494,6 +494,7 @@ config FB_SBUS_HELPERS select FB_CFB_COPYAREA select FB_CFB_FILLRECT select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS config FB_BW2 bool "BWtwo support" @@ -514,6 +515,7 @@ config FB_CG6 depends on (FB = y) && (SPARC && FB_SBUS) select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the CGsix (GX, TurboGX) frame buffer. @@ -523,6 +525,7 @@ config FB_FFB depends on FB_SBUS && SPARC64 select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the Creator, Creator3D, and Elite3D graphics boards. diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c index b67a28da4702..a1c467a0e9f7 100644 --- a/drivers/virt/vmgenid.c +++ b/drivers/virt/vmgenid.c @@ -68,7 +68,6 @@ out: static void vmgenid_notify(struct acpi_device *device, u32 event) { struct vmgenid_state *state = acpi_driver_data(device); - char *envp[] = { "NEW_VMGENID=1", NULL }; u8 old_id[VMGENID_SIZE]; memcpy(old_id, state->this_id, sizeof(old_id)); @@ -76,7 +75,6 @@ static void vmgenid_notify(struct acpi_device *device, u32 event) if (!memcmp(old_id, state->this_id, sizeof(old_id))) return; add_vmfork_randomness(state->this_id, sizeof(state->this_id)); - kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp); } static const struct acpi_device_id vmgenid_ids[] = { diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index f173587893cb..9510c551dce8 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = { .remove = virtio_dev_remove, }; -int register_virtio_driver(struct virtio_driver *driver) +int __register_virtio_driver(struct virtio_driver *driver, struct module *owner) { /* Catch this early. */ BUG_ON(driver->feature_table_size && !driver->feature_table); driver->driver.bus = &virtio_bus; + driver->driver.owner = owner; + return driver_register(&driver->driver); } -EXPORT_SYMBOL_GPL(register_virtio_driver); +EXPORT_SYMBOL_GPL(__register_virtio_driver); void unregister_virtio_driver(struct virtio_driver *driver) { diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 29281b7c3887..0d6138bee2a3 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -49,9 +49,6 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, unsigned int s_cache, unsigned int f_flags) { - if (fid->qid.type != P9_QTFILE) - return; - if ((!s_cache) || ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) || (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index abdbbaee5184..348cc90bf9c5 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -520,6 +520,7 @@ const struct file_operations v9fs_file_operations = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, + .setlease = simple_nosetlease, }; const struct file_operations v9fs_file_operations_dotl = { @@ -534,4 +535,5 @@ const struct file_operations v9fs_file_operations_dotl = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, + .setlease = simple_nosetlease, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 360a5304ec03..47bd77199e20 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -83,7 +83,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; + res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; @@ -178,6 +178,9 @@ int v9fs_uflags2omode(int uflags, int extended) break; } + if (uflags & O_TRUNC) + ret |= P9_OTRUNC; + if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; @@ -344,17 +347,21 @@ void v9fs_evict_inode(struct inode *inode) struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); __le32 __maybe_unused version; - truncate_inode_pages_final(&inode->i_data); + if (!is_bad_inode(inode)) { + truncate_inode_pages_final(&inode->i_data); - version = cpu_to_le32(v9inode->qid.version); - netfs_clear_inode_writeback(inode, &version); + version = cpu_to_le32(v9inode->qid.version); + netfs_clear_inode_writeback(inode, &version); - clear_inode(inode); - filemap_fdatawrite(&inode->i_data); + clear_inode(inode); + filemap_fdatawrite(&inode->i_data); #ifdef CONFIG_9P_FSCACHE - fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); + if (v9fs_inode_cookie(v9inode)) + fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); #endif + } else + clear_inode(inode); } struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) @@ -1057,8 +1064,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct v9fs_session_info *v9ses = sb->s_fs_info; struct v9fs_inode *v9inode = V9FS_I(inode); - set_nlink(inode, 1); - inode_set_atime(inode, stat->atime, 0); inode_set_mtime(inode, stat->mtime, 0); inode_set_ctime(inode, stat->mtime, 0); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index ef9db3e03506..55dde186041a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -78,11 +78,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) retval = v9fs_init_inode(v9ses, inode, &fid->qid, st->st_mode, new_decode_dev(st->st_rdev)); + v9fs_stat2inode_dotl(st, inode, 0); kfree(st); if (retval) goto error; - v9fs_stat2inode_dotl(st, inode, 0); v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); @@ -297,7 +297,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, umode_t omode) { int err; - struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; const unsigned char *name; @@ -307,7 +306,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); - v9ses = v9fs_inode2v9ses(dir); omode |= S_IFDIR; if (dir->i_mode & S_ISGID) @@ -739,7 +737,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, kgid_t gid; const unsigned char *name; umode_t mode; - struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; @@ -749,7 +746,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); - v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 4236058c7bbd..55e67e36ae68 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -244,6 +244,21 @@ done: return res; } +static int v9fs_drop_inode(struct inode *inode) +{ + struct v9fs_session_info *v9ses; + + v9ses = v9fs_inode2v9ses(inode); + if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) + return generic_drop_inode(inode); + /* + * in case of non cached mode always drop the + * inode because we want the inode attribute + * to always match that on the server. + */ + return 1; +} + static int v9fs_write_inode(struct inode *inode, struct writeback_control *wbc) { @@ -268,6 +283,7 @@ static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = simple_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, @@ -278,6 +294,7 @@ static const struct super_operations v9fs_super_ops_dotl = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = v9fs_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, diff --git a/fs/aio.c b/fs/aio.c index 9cdaa2faa536..0f4f531c9780 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb) spin_lock_irqsave(&ctx->wait.lock, flags); list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry) if (avail >= curr->min_nr) { - list_del_init_careful(&curr->w.entry); wake_up_process(curr->w.private); + list_del_init_careful(&curr->w.entry); } spin_unlock_irqrestore(&ctx->wait.lock, flags); } diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index b02796c8a595..66ca0bbee639 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -17,6 +17,7 @@ bcachefs-y := \ btree_journal_iter.o \ btree_key_cache.o \ btree_locking.o \ + btree_node_scan.o \ btree_trans_commit.o \ btree_update.o \ btree_update_interior.o \ @@ -37,6 +38,7 @@ bcachefs-y := \ error.o \ extents.o \ extent_update.o \ + eytzinger.o \ fs.o \ fs-common.o \ fs-ioctl.o \ @@ -67,6 +69,7 @@ bcachefs-y := \ quota.o \ rebalance.o \ recovery.o \ + recovery_passes.o \ reflink.o \ replicas.o \ sb-clean.o \ diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 3640f417cce1..5c180fdc3efb 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; - struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; struct bkey_s_c k; int ret; @@ -290,29 +289,28 @@ retry: ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, inode_inum(inode), &search, 0); - if (ret) { - if (!bch2_err_matches(ret, ENOENT)) - acl = ERR_PTR(ret); - goto out; - } + if (ret) + goto err; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); - if (ret) { - acl = ERR_PTR(ret); - goto out; - } + if (ret) + goto err; - xattr = bkey_s_c_to_xattr(k); + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); - - if (!IS_ERR(acl)) - set_cached_acl(&inode->v, type, acl); -out: - if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart)) + le16_to_cpu(xattr.v->x_val_len)); + ret = PTR_ERR_OR_ZERO(acl); +err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; + if (ret) + acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL; + + if (!IS_ERR_OR_NULL(acl)) + set_cached_acl(&inode->v, type, acl); + bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); return acl; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 893e38f9db80..4ff56fa4d539 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (ret) goto out; - if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { - a->v.gen++; - SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); - goto write; - } - - if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { - bch2_trans_inconsistent(trans, - "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" - "%s", - a->v.journal_seq, - c->journal.flushed_seq_ondisk, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (a->v.dirty_sectors) { + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "attempting to discard bucket with dirty data\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = -EIO; - } goto out; } if (a->v.data_type != BCH_DATA_need_discard) { - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { - bch2_trans_inconsistent(trans, - "bucket incorrectly set in need_discard btree\n" - "%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - ret = -EIO; + if (data_type_is_empty(a->v.data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { + a->v.gen++; + SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); + goto write; } + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "bucket incorrectly set in need_discard btree\n" + "%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = -EIO; + goto out; + } + + if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = -EIO; goto out; } @@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo if (ret) goto err; + BUG_ON(a->v.dirty_sectors); SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); a->v.data_type = alloc_data_type(a->v, a->v.data_type); @@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, goto out; BUG_ON(a->v.data_type != BCH_DATA_cached); + BUG_ON(a->v.dirty_sectors); if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 214b15c84d1f..a1fc30adf912 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca) static inline unsigned open_buckets_reserved(enum bch_watermark watermark) { switch (watermark) { - case BCH_WATERMARK_reclaim: + case BCH_WATERMARK_interior_updates: return 0; + case BCH_WATERMARK_reclaim: + return OPEN_BUCKETS_COUNT / 6; case BCH_WATERMARK_btree: case BCH_WATERMARK_btree_copygc: return OPEN_BUCKETS_COUNT / 4; diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index b91b7a461056..c2226e947c41 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -22,7 +22,8 @@ struct bucket_alloc_state { x(copygc) \ x(btree) \ x(btree_copygc) \ - x(reclaim) + x(reclaim) \ + x(interior_updates) enum bch_watermark { #define x(name) BCH_WATERMARK_##name, diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 8cb35ea572cb..fadb1078903d 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -8,6 +8,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" +#include "checksum.h" #include "error.h" #include @@ -29,8 +30,7 @@ static bool extent_matches_bp(struct bch_fs *c, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, btree_id, level, k, p, - &bucket2, &bp2); + bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2); if (bpos_eq(bucket, bucket2) && !memcmp(&bp, &bp2, sizeof(bp))) return true; @@ -44,13 +44,20 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + + /* these will be caught by fsck */ + if (!bch2_dev_exists2(c, bp.k->p.inode)) + return 0; + + struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode); struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); int ret = 0; - bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || + !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), c, err, - backpointer_pos_wrong, - "backpointer at wrong pos"); + backpointer_bucket_offset_wrong, + "backpointer bucket_offset wrong"); fsck_err: return ret; } @@ -378,7 +385,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ backpointer_to_missing_alloc, "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", alloc_iter.pos.inode, alloc_iter.pos.offset, - (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, bp_iter, 0); goto out; } @@ -414,6 +421,84 @@ struct extents_to_bp_state { struct bkey_buf last_flushed; }; +static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, + struct bkey_s_c extent, unsigned dev) +{ + struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bch2_bkey_drop_device(bkey_i_to_s(n), dev); + return bch2_btree_insert_trans(trans, btree, n, 0); +} + +static int check_extent_checksum(struct btree_trans *trans, + enum btree_id btree, struct bkey_s_c extent, + enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct printbuf buf = PRINTBUF; + void *data_buf = NULL; + struct bio *bio = NULL; + size_t bytes; + int ret = 0; + + if (bkey_is_btree_ptr(extent.k)) + return false; + + bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) + if (p.ptr.dev == dev) + goto found; + BUG(); +found: + if (!p.crc.csum_type) + return false; + + bytes = p.crc.compressed_size << 9; + + struct bch_dev *ca = bch_dev_bkey_exists(c, dev); + if (!bch2_dev_get_ioref(ca, READ)) + return false; + + data_buf = kvmalloc(bytes, GFP_KERNEL); + if (!data_buf) { + ret = -ENOMEM; + goto err; + } + + bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio->bi_iter.bi_sector = p.ptr.offset; + bch2_bio_map(bio, data_buf, bytes); + ret = submit_bio_wait(bio); + if (ret) + goto err; + + prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); + prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); + bch2_bkey_val_to_text(&buf, c, extent); + prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); + bch2_bkey_val_to_text(&buf, c, extent2); + + struct nonce nonce = extent_nonce(extent.k->version, p.crc); + struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); + if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), + c, dup_backpointer_to_bad_csum_extent, + "%s", buf.buf)) + ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; +fsck_err: +err: + if (bio) + bio_put(bio); + kvfree(data_buf); + percpu_ref_put(&ca->io_ref); + printbuf_exit(&buf); + return ret; +} + static int check_bp_exists(struct btree_trans *trans, struct extents_to_bp_state *s, struct bpos bucket, @@ -421,7 +506,8 @@ static int check_bp_exists(struct btree_trans *trans, struct bkey_s_c orig_k) { struct bch_fs *c = trans->c; - struct btree_iter bp_iter = { NULL }; + struct btree_iter bp_iter = {}; + struct btree_iter other_extent_iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; struct bkey_buf tmp; @@ -429,13 +515,19 @@ static int check_bp_exists(struct btree_trans *trans, bch2_bkey_buf_init(&tmp); + if (!bch2_dev_bucket_exists(c, bucket)) { + prt_str(&buf, "extent for nonexistent device:bucket "); + bch2_bpos_to_text(&buf, bucket); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, orig_k); + bch_err(c, "%s", buf.buf); + return -BCH_ERR_fsck_repair_unimplemented; + } + if (bpos_lt(bucket, s->bucket_start) || bpos_gt(bucket, s->bucket_end)) return 0; - if (!bch2_dev_bucket_exists(c, bucket)) - goto missing; - bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bucket_pos_to_bp(c, bucket, bp.bucket_offset), 0); @@ -461,21 +553,94 @@ static int check_bp_exists(struct btree_trans *trans, ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } - goto missing; + + goto check_existing_bp; } out: err: fsck_err: + bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&tmp, c); printbuf_exit(&buf); return ret; +check_existing_bp: + /* Do we have a backpointer for a different extent? */ + if (bp_k.k->type != KEY_TYPE_backpointer) + goto missing; + + struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; + + struct bkey_s_c other_extent = + bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); + ret = bkey_err(other_extent); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + ret = 0; + if (ret) + goto err; + + if (!other_extent.k) + goto missing; + + if (bch2_extents_match(orig_k, other_extent)) { + printbuf_reset(&buf); + prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, other_extent); + bch_err(c, "%s", buf.buf); + + if (other_extent.k->size <= orig_k.k->size) { + ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); + if (ret) + goto err; + goto out; + } else { + ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); + if (ret) + goto err; + goto missing; + } + } + + ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); + if (ret < 0) + goto err; + if (ret) { + ret = 0; + goto missing; + } + + ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); + if (ret < 0) + goto err; + if (ret) { + ret = 0; + goto out; + } + + printbuf_reset(&buf); + prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, other_extent); + bch_err(c, "%s", buf.buf); + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err; missing: + printbuf_reset(&buf); prt_printf(&buf, "missing backpointer for btree=%s l=%u ", bch2_btree_id_str(bp.btree_id), bp.level); bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\nbp pos "); - bch2_bpos_to_text(&buf, bp_iter.pos); + prt_printf(&buf, "\n got: "); + bch2_bkey_val_to_text(&buf, c, bp_k); + + struct bkey_i_backpointer n_bp_k; + bkey_backpointer_init(&n_bp_k.k_i); + n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); + n_bp_k.v = bp; + prt_printf(&buf, "\n want: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); @@ -502,8 +667,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, btree, level, - k, p, &bucket_pos, &bp); + bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp); ret = check_bp_exists(trans, s, bucket_pos, bp, k); if (ret) diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 327365a9feac..85949b9fd880 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, u64 bucket_offset) { struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); - struct bpos ret; - - ret = POS(bucket.inode, - (bucket_to_sector(ca, bucket.offset) << - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); + struct bpos ret = POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); - return ret; } @@ -90,20 +87,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); } -static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p) +static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, + struct extent_ptr_decoded p, + const union bch_extent_entry *entry) { - return level ? BCH_DATA_btree : - p.has_ec ? BCH_DATA_stripe : - BCH_DATA_user; + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: + return BCH_DATA_btree; + case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: + return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user; + case KEY_TYPE_stripe: { + const struct bch_extent_ptr *ptr = &entry->ptr; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + + BUG_ON(ptr < s.v->ptrs || + ptr >= s.v->ptrs + s.v->nr_blocks); + + return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant + ? BCH_DATA_parity + : BCH_DATA_user; + } + default: + BUG(); + } } static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bkey_s_c k, struct extent_ptr_decoded p, + const union bch_extent_entry *entry, struct bpos *bucket_pos, struct bch_backpointer *bp) { - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); s64 sectors = level ? btree_sectors(c) : k.k->size; u32 bucket_offset; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 799aa32b6b4d..91c3c1fef233 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -209,7 +209,7 @@ #include "fifo.h" #include "nocow_locking_types.h" #include "opts.h" -#include "recovery_types.h" +#include "recovery_passes_types.h" #include "sb-errors_types.h" #include "seqmutex.h" #include "time_stats.h" @@ -456,6 +456,7 @@ enum bch_time_stats { #include "alloc_types.h" #include "btree_types.h" +#include "btree_node_scan_types.h" #include "btree_write_buffer_types.h" #include "buckets_types.h" #include "buckets_waiting_for_journal_types.h" @@ -614,6 +615,7 @@ struct bch_dev { */ #define BCH_FS_FLAGS() \ + x(new_fs) \ x(started) \ x(may_go_rw) \ x(rw) \ @@ -707,6 +709,8 @@ struct btree_trans_buf { x(stripe_delete) \ x(reflink) \ x(fallocate) \ + x(fsync) \ + x(dio_write) \ x(discard) \ x(discard_fast) \ x(invalidate) \ @@ -796,6 +800,7 @@ struct bch_fs { u64 features; u64 compat; unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; + u64 btrees_lost_data; } sb; @@ -810,7 +815,6 @@ struct bch_fs { /* snapshot.c: */ struct snapshot_table __rcu *snapshots; - size_t snapshot_table_size; struct mutex snapshot_table_lock; struct rw_semaphore snapshot_create_lock; @@ -1104,6 +1108,8 @@ struct bch_fs { struct journal_keys journal_keys; struct list_head journal_iters; + struct find_btree_nodes found_btree_nodes; + u64 last_bucket_seq_cleanup; u64 counters_on_mount[BCH_COUNTER_NR]; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index bff8750ac0d7..085987435a5e 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -578,7 +578,8 @@ struct bch_member { __le64 nbuckets; /* device size */ __le16 first_bucket; /* index of first bucket used */ __le16 bucket_size; /* sectors */ - __le32 pad; + __u8 btree_bitmap_shift; + __u8 pad[3]; __le64 last_mount; /* time_t */ __le64 flags; @@ -587,6 +588,7 @@ struct bch_member { __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; __le64 errors_reset_time; __le64 seq; + __le64 btree_allocated_bitmap; }; #define BCH_MEMBER_V1_BYTES 56 @@ -818,6 +820,7 @@ struct bch_sb_field_ext { struct bch_sb_field field; __le64 recovery_passes_required[2]; __le64 errors_silent[8]; + __le64 btrees_lost_data; }; struct bch_sb_field_downgrade_entry { @@ -875,7 +878,8 @@ struct bch_sb_field_downgrade { x(rebalance_work, BCH_VERSION(1, 3)) \ x(member_seq, BCH_VERSION(1, 4)) \ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ - x(btree_subvolume_children, BCH_VERSION(1, 6)) + x(btree_subvolume_children, BCH_VERSION(1, 6)) \ + x(mi_btree_bitmap, BCH_VERSION(1, 7)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1313,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(write_buffer_keys, 11) \ x(datetime, 12) -enum { +enum bch_jset_entry_type { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, BCH_JSET_ENTRY_TYPES() #undef x @@ -1359,7 +1363,7 @@ struct jset_entry_blacklist_v2 { x(inodes, 1) \ x(key_version, 2) -enum { +enum bch_fs_usage_type { #define x(f, nr) BCH_FS_USAGE_##f = nr, BCH_FS_USAGE_TYPES() #undef x @@ -1534,6 +1538,20 @@ enum btree_id { BTREE_ID_NR }; +static inline bool btree_id_is_alloc(enum btree_id id) +{ + switch (id) { + case BTREE_ID_alloc: + case BTREE_ID_backpointers: + case BTREE_ID_need_discard: + case BTREE_ID_freespace: + case BTREE_ID_bucket_gens: + return true; + default: + return false; + } +} + #define BTREE_MAX_DEPTH 4U /* Btree nodes */ diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index cf23ff47bed8..3a45d128f608 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format, return bkey_packed(k) ? format->key_u64s : BKEY_U64s; } +static inline bool bkeyp_u64s_valid(const struct bkey_format *f, + const struct bkey_packed *k) +{ + return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s); +} + static inline unsigned bkeyp_key_bytes(const struct bkey_format *format, const struct bkey_packed *k) { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5e52684764eb..db336a43fc08 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && + bkey_fsck_err_on((type == BKEY_TYPE_btree || + (flags & BKEY_INVALID_COMMIT)) && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", - bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); + bch2_btree_node_type_str(type), + k.k->type < KEY_TYPE_MAX + ? bch2_bkey_types[k.k->type] + : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { bkey_fsck_err_on(k.k->size == 0, c, err, diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 3fd1085b6c61..3bb477840eab 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b, printbuf_exit(&buf); } -#ifdef CONFIG_BCACHEFS_DEBUG - -void __bch2_verify_btree_nr_keys(struct btree *b) +struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) { struct bset_tree *t; struct bkey_packed *k; - struct btree_nr_keys nr = { 0 }; + struct btree_nr_keys nr = {}; for_each_bset(b, t) bset_tree_for_each_key(b, t, k) if (!bkey_deleted(k)) btree_keys_account_key_add(&nr, t - b->set, k); + return nr; +} + +#ifdef CONFIG_BCACHEFS_DEBUG + +void __bch2_verify_btree_nr_keys(struct btree *b) +{ + struct btree_nr_keys nr = bch2_btree_node_count_keys(b); BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); } diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 79c77baaa383..120a79fd456b 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, /* Accounting: */ +struct btree_nr_keys bch2_btree_node_count_keys(struct btree *); + static inline void btree_keys_account_key(struct btree_nr_keys *n, unsigned bset, struct bkey_packed *k, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 562561a9a510..02c70e813fac 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; - u32 seq; - BUG_ON(level + 1 >= BTREE_MAX_DEPTH); + if (unlikely(level >= BTREE_MAX_DEPTH)) { + int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", + level, BTREE_MAX_DEPTH); + return ERR_PTR(ret); + } + + if (unlikely(!bkey_is_btree_ptr(&k->k))) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); + printbuf_exit(&buf); + return ERR_PTR(ret); + } + + if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); + printbuf_exit(&buf); + return ERR_PTR(ret); + } + /* * Parent node must be locked, else we could read in a btree node that's * been freed: @@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, } set_btree_node_read_in_flight(b); - six_unlock_write(&b->c.lock); - seq = six_lock_seq(&b->c.lock); - six_unlock_intent(&b->c.lock); - - /* Unlock before doing IO: */ - if (path && sync) - bch2_trans_unlock_noassert(trans); - - bch2_btree_node_read(trans, b, sync); - - if (!sync) - return NULL; if (path) { - int ret = bch2_trans_relock(trans) ?: - bch2_btree_path_relock_intent(trans, path); - if (ret) { - BUG_ON(!trans->restarted); - return ERR_PTR(ret); - } - } + u32 seq = six_lock_seq(&b->c.lock); - if (!six_relock_type(&b->c.lock, lock_type, seq)) { - BUG_ON(!path); + /* Unlock before doing IO: */ + six_unlock_intent(&b->c.lock); + bch2_trans_unlock_noassert(trans); - trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); + bch2_btree_node_read(trans, b, sync); + + if (!sync) + return NULL; + + if (!six_relock_type(&b->c.lock, lock_type, seq)) + b = NULL; + } else { + bch2_btree_node_read(trans, b, sync); + if (lock_type == SIX_LOCK_read) + six_lock_downgrade(&b->c.lock); } return b; @@ -808,7 +822,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) prt_printf(&buf, "\nmax "); bch2_bpos_to_text(&buf, b->data->max_key); - bch2_fs_inconsistent(c, "%s", buf.buf); + bch2_fs_topology_error(c, "%s", buf.buf); + printbuf_exit(&buf); } @@ -1111,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; - struct btree *b; BUG_ON(path && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); - b = btree_cache_find(bc, k); + struct btree *b = btree_cache_find(bc, k); if (b) return 0; b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); - return PTR_ERR_OR_ZERO(b); + if (!IS_ERR_OR_NULL(b)) + six_unlock_read(&b->c.lock); + return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) @@ -1134,6 +1150,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) b = btree_cache_find(bc, k); if (!b) return; + + BUG_ON(b == btree_node_root(trans->c, b)); wait_on_io: /* not allowed to wait on io with btree locks held: */ @@ -1145,6 +1163,8 @@ wait_on_io: btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); + if (unlikely(b->hash_val != btree_ptr_hash_val(k))) + goto out; if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); @@ -1159,7 +1179,7 @@ wait_on_io: btree_node_data_free(c, b); bch2_btree_node_hash_remove(bc, b); mutex_unlock(&bc->lock); - +out: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index bdaed29f084a..ecbd9598f69f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -7,11 +7,13 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "bkey_methods.h" #include "bkey_buf.h" #include "btree_journal_iter.h" #include "btree_key_cache.h" #include "btree_locking.h" +#include "btree_node_scan.h" #include "btree_update_interior.h" #include "btree_io.h" #include "btree_gc.h" @@ -24,7 +26,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" -#include "recovery.h" +#include "recovery_passes.h" #include "reflink.h" #include "replicas.h" #include "super-io.h" @@ -40,6 +42,7 @@ #define DROP_THIS_NODE 10 #define DROP_PREV_NODE 11 +#define DID_FILL_FROM_SCAN 12 static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) { @@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) __gc_pos_set(c, new_pos); } -/* - * Missing: if an interior btree node is empty, we need to do something - - * perhaps just kill it - */ -static int bch2_gc_check_topology(struct bch_fs *c, - struct btree *b, - struct bkey_buf *prev, - struct bkey_buf cur, - bool is_last) -{ - struct bpos node_start = b->data->min_key; - struct bpos node_end = b->data->max_key; - struct bpos expected_start = bkey_deleted(&prev->k->k) - ? node_start - : bpos_successor(prev->k->k.p); - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - int ret = 0; - - if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { - struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); - - if (!bpos_eq(expected_start, bp->v.min_key)) { - bch2_topology_error(c); - - if (bkey_deleted(&prev->k->k)) { - prt_printf(&buf1, "start of node: "); - bch2_bpos_to_text(&buf1, node_start); - } else { - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k)); - } - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); - - if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, - btree_node_topology_bad_min_key, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { - bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - goto err; - } else { - set_bit(BCH_FS_initial_gc_unfixed, &c->flags); - } - } - } - - if (is_last && !bpos_eq(cur.k->k.p, node_end)) { - bch2_topology_error(c); - - printbuf_reset(&buf1); - printbuf_reset(&buf2); - - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); - bch2_bpos_to_text(&buf2, node_end); - - if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " %s\n" - " expected %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf) && - should_restart_for_topology_repair(c)) { - bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - goto err; - } else { - set_bit(BCH_FS_initial_gc_unfixed, &c->flags); - } - } - - bch2_bkey_buf_copy(prev, c, cur.k); -err: -fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); - return ret; -} - static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) { switch (b->key.k.type) { @@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) struct bkey_i_btree_ptr_v2 *new; int ret; + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, " -> "); + bch2_bpos_to_text(&buf, new_min); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); if (!new) return -BCH_ERR_ENOMEM_gc_repair_key; @@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) struct bkey_i_btree_ptr_v2 *new; int ret; + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, " -> "); + bch2_bpos_to_text(&buf, new_max); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); if (ret) return ret; @@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) return 0; } -static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, - struct btree *prev, struct btree *cur) +static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b, + struct btree *prev, struct btree *cur, + struct bpos *pulled_from_scan) { struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; int ret = 0; - if (!prev) { - prt_printf(&buf1, "start of node: "); - bch2_bpos_to_text(&buf1, b->data->min_key); - } else { - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key)); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); + + if (bpos_eq(expected_start, cur->data->min_key)) + return 0; + + prt_printf(&buf, " at btree %s level %u:\n parent: ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + if (prev) { + prt_printf(&buf, "\n prev: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key)); } - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key)); + prt_str(&buf, "\n next: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key)); - if (prev && - bpos_gt(expected_start, cur->data->min_key) && - BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { - /* cur overwrites prev: */ + if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */ + if (b->c.level == 1 && + bpos_lt(*pulled_from_scan, cur->data->min_key)) { + ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, + expected_start, + bpos_predecessor(cur->data->min_key)); + if (ret) + goto err; - if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key, - cur->data->min_key), c, - btree_node_topology_overwritten_by_next_node, - "btree node overwritten by next node at btree %s level %u:\n" - " node %s\n" - " next %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = DROP_PREV_NODE; - goto out; + *pulled_from_scan = cur->data->min_key; + ret = DID_FILL_FROM_SCAN; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, + "btree node with incorrect min_key%s", buf.buf)) + ret = set_node_min(c, cur, expected_start); } - - if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p, - bpos_predecessor(cur->data->min_key)), c, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " node %s\n" - " next %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) - ret = set_node_max(c, prev, - bpos_predecessor(cur->data->min_key)); - } else { - /* prev overwrites cur: */ - - if (mustfix_fsck_err_on(bpos_ge(expected_start, - cur->data->max_key), c, - btree_node_topology_overwritten_by_prev_node, - "btree node overwritten by prev node at btree %s level %u:\n" - " prev %s\n" - " node %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = DROP_THIS_NODE; - goto out; + } else { /* overlap */ + if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */ + if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ + if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node, + "btree node overwritten by next node%s", buf.buf)) + ret = DROP_PREV_NODE; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, + "btree node with incorrect max_key%s", buf.buf)) + ret = set_node_max(c, prev, + bpos_predecessor(cur->data->min_key)); + } + } else { + if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ + if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node, + "btree node overwritten by prev node%s", buf.buf)) + ret = DROP_THIS_NODE; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, + "btree node with incorrect min_key%s", buf.buf)) + ret = set_node_min(c, cur, expected_start); + } } - - if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c, - btree_node_topology_bad_min_key, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " node %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) - ret = set_node_min(c, cur, expected_start); } -out: +err: fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); + printbuf_exit(&buf); return ret; } static int btree_repair_node_end(struct bch_fs *c, struct btree *b, - struct btree *child) + struct btree *child, struct bpos *pulled_from_scan) { - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; int ret = 0; - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key)); - bch2_bpos_to_text(&buf2, b->key.k.p); + if (bpos_eq(child->key.k.p, b->key.k.p)) + return 0; - if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " %s\n" - " expected %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = set_node_max(c, child, b->key.k.p); - if (ret) - goto err; + prt_printf(&buf, "at btree %s level %u:\n parent: ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + prt_str(&buf, "\n child: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key)); + + if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, + "btree node with incorrect max_key%s", buf.buf)) { + if (b->c.level == 1 && + bpos_lt(*pulled_from_scan, b->key.k.p)) { + ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, + bpos_successor(child->key.k.p), b->key.k.p); + if (ret) + goto err; + + *pulled_from_scan = b->key.k.p; + ret = DID_FILL_FROM_SCAN; + } else { + ret = set_node_max(c, child, b->key.k.p); + } } err: fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); + printbuf_exit(&buf); return ret; } -static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b) +static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b, + struct bpos *pulled_from_scan) { struct bch_fs *c = trans->c; struct btree_and_journal_iter iter; struct bkey_s_c k; struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; - bool have_child, dropped_children = false; + bool have_child, new_pass = false; struct printbuf buf = PRINTBUF; int ret = 0; if (!b->c.level) return 0; -again: - prev = NULL; - have_child = dropped_children = false; + bch2_bkey_buf_init(&prev_k); bch2_bkey_buf_init(&cur_k); +again: + cur = prev = NULL; + have_child = new_pass = false; bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; @@ -415,6 +367,28 @@ again: b->c.level - 1, buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); + cur = NULL; + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); + if (ret) + break; + + if (!btree_id_is_alloc(b->c.btree_id)) { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + break; + } + continue; + } + + bch_err_msg(c, ret, "getting btree node"); + if (ret) + break; + + if (bch2_btree_node_is_stale(c, cur)) { + bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); + six_unlock_read(&cur->c.lock); + bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); cur = NULL; @@ -423,11 +397,11 @@ again: continue; } - bch_err_msg(c, ret, "getting btree node"); - if (ret) - break; - - ret = btree_repair_node_boundaries(c, b, prev, cur); + ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan); + if (ret == DID_FILL_FROM_SCAN) { + new_pass = true; + ret = 0; + } if (ret == DROP_THIS_NODE) { six_unlock_read(&cur->c.lock); @@ -445,6 +419,7 @@ again: prev = NULL; if (ret == DROP_PREV_NODE) { + bch_info(c, "dropped prev node"); bch2_btree_node_evict(trans, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, prev_k.k->k.p); @@ -452,8 +427,6 @@ again: break; bch2_btree_and_journal_iter_exit(&iter); - bch2_bkey_buf_exit(&prev_k, c); - bch2_bkey_buf_exit(&cur_k, c); goto again; } else if (ret) break; @@ -465,7 +438,11 @@ again: if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(c, b, prev); + ret = btree_repair_node_end(c, b, prev, pulled_from_scan); + if (ret == DID_FILL_FROM_SCAN) { + new_pass = true; + ret = 0; + } } if (!IS_ERR_OR_NULL(prev)) @@ -479,6 +456,10 @@ again: goto err; bch2_btree_and_journal_iter_exit(&iter); + + if (new_pass) + goto again; + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; @@ -495,7 +476,7 @@ again: if (ret) goto err; - ret = bch2_btree_repair_topology_recurse(trans, cur); + ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan); six_unlock_read(&cur->c.lock); cur = NULL; @@ -503,7 +484,7 @@ again: bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); - dropped_children = true; + new_pass = true; } if (ret) @@ -530,12 +511,14 @@ fsck_err: six_unlock_read(&cur->c.lock); bch2_btree_and_journal_iter_exit(&iter); - bch2_bkey_buf_exit(&prev_k, c); - bch2_bkey_buf_exit(&cur_k, c); - if (!ret && dropped_children) + if (!ret && new_pass) goto again; + BUG_ON(!ret && bch2_btree_node_check_topology(trans, b)); + + bch2_bkey_buf_exit(&prev_k, c); + bch2_bkey_buf_exit(&cur_k, c); printbuf_exit(&buf); return ret; } @@ -543,32 +526,63 @@ fsck_err: int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree *b; - unsigned i; + struct bpos pulled_from_scan = POS_MIN; int ret = 0; - for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { struct btree_root *r = bch2_btree_id_root(c, i); + bool reconstructed_root = false; - if (!r->alive) - continue; + if (r->error) { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + break; +reconstruct_root: + bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); - b = r->b; - if (btree_node_fake(b)) - continue; + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); + bch2_btree_root_alloc_fake(c, i, 0); + } else { + bch2_btree_root_alloc_fake(c, i, 1); + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + if (ret) + break; + } + + reconstructed_root = true; + } + + struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - ret = bch2_btree_repair_topology_recurse(trans, b); + ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { - bch_err(c, "empty btree root - repair unimplemented"); - ret = -BCH_ERR_fsck_repair_unimplemented; + bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + r->b = NULL; + + if (!reconstructed_root) + goto reconstruct_root; + + bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); + bch2_btree_root_alloc_fake(c, i, 0); + r->alive = false; + ret = 0; } } - +fsck_err: bch2_trans_put(trans); - return ret; } @@ -591,7 +605,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c); if (fsck_err_on(!g->gen_valid, c, ptr_to_missing_alloc_key, @@ -657,7 +671,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id continue; if (fsck_err_on(bucket_data_type(g->data_type) && - bucket_data_type(g->data_type) != data_type, c, + bucket_data_type(g->data_type) != + bucket_data_type(data_type), c, ptr_bucket_data_type_mismatch, "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", @@ -698,18 +713,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id } if (do_update) { - struct bkey_ptrs ptrs; - union bch_extent_entry *entry; - struct bch_extent_ptr *ptr; - struct bkey_i *new; - if (is_root) { bch_err(c, "cannot update btree roots yet"); ret = -EINVAL; goto err; } - new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); + struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); if (!new) { ret = -BCH_ERR_ENOMEM_gc_repair_key; bch_err_msg(c, ret, "allocating new key"); @@ -724,7 +734,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id * btree node isn't there anymore, the read path will * sort it out: */ - ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_for_each_ptr(ptrs, ptr) { struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g = PTR_GC_BUCKET(ca, ptr); @@ -732,19 +742,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id ptr->gen = g->gen; } } else { - bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_GC_BUCKET(ca, ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr); + struct bkey_ptrs ptrs; + union bch_extent_entry *entry; +restart_drop_ptrs: + ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry); - (ptr->cached && - (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) || - (!ptr->cached && - gen_cmp(ptr->gen, g->gen) < 0) || - gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX || - (g->data_type && - g->data_type != data_type); - })); + if ((p.ptr.cached && + (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) || + (!p.ptr.cached && + gen_cmp(p.ptr.gen, g->gen) < 0) || + gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX || + (g->data_type && + g->data_type != data_type)) { + bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr); + goto restart_drop_ptrs; + } + } again: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_extent_entry_for_each(ptrs, entry) { @@ -774,12 +791,6 @@ found: } } - ret = bch2_journal_key_insert_take(c, btree_id, level, new); - if (ret) { - kfree(new); - goto err; - } - if (level) bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new); @@ -793,6 +804,12 @@ found: bch_info(c, "new key %s", buf.buf); } + ret = bch2_journal_key_insert_take(c, btree_id, level, new); + if (ret) { + kfree(new); + goto err; + } + *k = bkey_i_to_s_c(new); } err: @@ -811,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bch_fs *c = trans->c; struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; + struct printbuf buf = PRINTBUF; int ret = 0; deleted.p = k->k->p; @@ -819,10 +837,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, BUG_ON(bch2_journal_seq_verify && k->k->version.lo > atomic64_read(&c->journal.seq)); - ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); - if (ret) - goto err; - if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c, bkey_version_in_future, "key version number higher than recorded: %llu > %llu", @@ -831,52 +845,57 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, atomic64_set(&c->key_version, k->k->version.lo); } + ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); + if (ret) + goto err; + + if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k), + c, btree_bitmap_not_marked, + "btree ptr not marked in member info btree allocated bitmap\n %s", + (bch2_bkey_val_to_text(&buf, c, *k), + buf.buf))) { + mutex_lock(&c->sb_lock); + bch2_dev_btree_bitmap_mark(c, *k); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + ret = commit_do(trans, NULL, NULL, 0, - bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); + bch2_key_trigger(trans, btree_id, level, old, + unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: + printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial) { - struct bch_fs *c = trans->c; struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; - struct bkey_buf prev, cur; int ret = 0; + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; bch2_btree_node_iter_init_from_start(&iter, b); - bch2_bkey_buf_init(&prev); - bch2_bkey_buf_init(&cur); - bkey_init(&prev.k->k); while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false, &k, initial); if (ret) - break; + return ret; bch2_btree_node_iter_advance(&iter, b); - - if (b->c.level) { - bch2_bkey_buf_reassemble(&cur, c, k); - - ret = bch2_gc_check_topology(c, b, &prev, cur, - bch2_btree_node_iter_end(&iter)); - if (ret) - break; - } } - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret; + return 0; } static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id, @@ -925,14 +944,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b struct bch_fs *c = trans->c; struct btree_and_journal_iter iter; struct bkey_s_c k; - struct bkey_buf cur, prev; + struct bkey_buf cur; struct printbuf buf = PRINTBUF; int ret = 0; + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); - bch2_bkey_buf_init(&prev); bch2_bkey_buf_init(&cur); - bkey_init(&prev.k->k); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); @@ -943,20 +964,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b if (ret) goto fsck_err; - if (b->c.level) { - bch2_bkey_buf_reassemble(&cur, c, k); - k = bkey_i_to_s_c(cur.k); - - bch2_btree_and_journal_iter_advance(&iter); - - ret = bch2_gc_check_topology(c, b, - &prev, cur, - !bch2_btree_and_journal_iter_peek(&iter).k); - if (ret) - goto fsck_err; - } else { - bch2_btree_and_journal_iter_advance(&iter); - } + bch2_btree_and_journal_iter_advance(&iter); } if (b->c.level > target_depth) { @@ -1015,7 +1023,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b } fsck_err: bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); bch2_btree_and_journal_iter_exit(&iter); printbuf_exit(&buf); return ret; @@ -1033,9 +1040,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans, b = bch2_btree_id_root(c, btree_id)->b; - if (btree_node_fake(b)) - return 0; - six_lock_read(&b->c.lock, NULL, NULL); printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->min_key); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 34df8ccc5fec..9678b2375bed 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) */ bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); + b->nr = bch2_btree_node_count_keys(b); struct bkey_s_c k; struct bkey unpacked; @@ -830,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); } -static bool __bkey_valid(struct bch_fs *c, struct btree *b, +static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, struct bset *i, struct bkey_packed *k) { if (bkey_p_next(k) > vstruct_last(i)) @@ -839,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b, if (k->format > KEY_FORMAT_CURRENT) return false; - if (k->u64s < bkeyp_key_u64s(&b->format, k)) + if (!bkeyp_u64s_valid(&b->format, k)) return false; struct printbuf buf = PRINTBUF; @@ -883,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, "invalid bkey format %u", k->format)) goto drop_this_key; - if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k), + if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_u64s, - "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k))) + "bad k->u64s %u (min %u max %lu)", k->u64s, + bkeyp_key_u64s(&b->format, k), + U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; if (!write) @@ -946,13 +949,12 @@ drop_this_key: * do */ - if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { + if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { for (next_good_key = 1; next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; next_good_key++) - if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) + if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) goto got_good_key; - } /* @@ -1263,10 +1265,12 @@ out: return retry_read; fsck_err: if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) + ret == -BCH_ERR_btree_node_read_err_must_retry) { retry_read = 1; - else + } else { set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); + } goto out; } @@ -1327,6 +1331,7 @@ start: if (!can_retry) { set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); break; } } @@ -1335,7 +1340,9 @@ start: rb->start_time); bio_put(&rb->bio); - if (saw_error && !btree_node_read_error(b)) { + if (saw_error && + !btree_node_read_error(b) && + c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", @@ -1526,9 +1533,10 @@ fsck_err: ret = -1; } - if (ret) + if (ret) { set_btree_node_read_error(b); - else if (*saw_error) + bch2_btree_lost_data(c, b->c.btree_id); + } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); for (i = 0; i < ra->nr; i++) { @@ -1657,13 +1665,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); - bch_err(c, "%s", buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) bch2_fatal_error(c); set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); @@ -1860,7 +1869,7 @@ static void btree_node_write_work(struct work_struct *work) } else { ret = bch2_trans_do(c, NULL, NULL, 0, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, - BCH_WATERMARK_reclaim| + BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 51bcdc6c6d1c..2a211a4bebd1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (ret) goto err; } else { - bch2_bkey_buf_unpack(&tmp, c, l->b, - bch2_btree_node_iter_peek(&l->iter, l->b)); + struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b); + if (!k) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "node not found at pos "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, " within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key)); + + bch2_fs_fatal_error(c, "%s", buf.buf); + printbuf_exit(&buf); + ret = -BCH_ERR_btree_need_topology_repair; + goto err; + } + + bch2_bkey_buf_unpack(&tmp, c, l->b, k); if ((flags & BTREE_ITER_PREFETCH) && c->opts.btree_node_prefetch) { @@ -962,7 +976,6 @@ err: return ret; } - static int bch2_btree_path_traverse_all(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); + if (trans->used_mempool) { + if (trans->mem_bytes >= new_bytes) + goto out_change_top; + + /* No more space from mempool item, need malloc new one */ + new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + + new_mem = kmalloc(new_bytes, GFP_KERNEL); + if (!new_mem) + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + + ret = bch2_trans_relock(trans); + if (ret) { + kfree(new_mem); + return ERR_PTR(ret); + } + } + memcpy(new_mem, trans->mem, trans->mem_top); + trans->used_mempool = false; + mempool_free(trans->mem, &c->btree_trans_mem_pool); + goto out_new_mem; + } + new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); @@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; + memcpy(new_mem, trans->mem, trans->mem_top); + trans->used_mempool = true; kfree(trans->mem); } @@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (ret) return ERR_PTR(ret); } - +out_new_mem: trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); } - +out_change_top: p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans) if (paths_allocated != trans->_paths_allocated) kvfree_rcu_mightsleep(paths_allocated); - if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) + if (trans->used_mempool) mempool_free(trans->mem, &c->btree_trans_mem_pool); else kfree(trans->mem); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 24772538e4cc..1c70836dd7cc 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; - if (!trans->restarted) - btree_iter_path(trans, iter)->preserve = false; + if (!iter->path || trans->restarted) + return; + + struct btree_path *path = btree_iter_path(trans, iter); + path->preserve = false; + if (path->ref == 1) + path->should_be_locked = false; } void *__bch2_trans_kmalloc(struct btree_trans *, size_t); @@ -642,7 +647,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *); static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8) + if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8) return __bch2_btree_trans_too_many_iters(trans); return 0; diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 50e04356d72c..1e8cf49a6935 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); } +static void journal_iter_verify(struct journal_iter *iter) +{ + struct journal_keys *keys = iter->keys; + size_t gap_size = keys->size - keys->nr; + + BUG_ON(iter->idx >= keys->gap && + iter->idx < keys->gap + gap_size); + + if (iter->idx < keys->size) { + struct journal_key *k = keys->data + iter->idx; + + int cmp = cmp_int(k->btree_id, iter->btree_id) ?: + cmp_int(k->level, iter->level); + BUG_ON(cmp < 0); + } +} + static void journal_iters_fix(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; /* The key we just inserted is immediately before the gap: */ size_t gap_end = keys->gap + (keys->size - keys->nr); - struct btree_and_journal_iter *iter; + struct journal_key *new_key = &keys->data[keys->gap - 1]; + struct journal_iter *iter; /* * If an iterator points one after the key we just inserted, decrement @@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c) * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will * handle that: */ - list_for_each_entry(iter, &c->journal_iters, journal.list) - if (iter->journal.idx == gap_end) - iter->journal.idx = keys->gap - 1; + list_for_each_entry(iter, &c->journal_iters, list) { + journal_iter_verify(iter); + if (iter->idx == gap_end && + new_key->btree_id == iter->btree_id && + new_key->level == iter->level) + iter->idx = keys->gap - 1; + journal_iter_verify(iter); + } } static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap) @@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (idx > keys->gap) idx -= keys->size - keys->nr; + size_t old_gap = keys->gap; + if (keys->nr == keys->size) { + journal_iters_move_gap(c, old_gap, keys->size); + old_gap = keys->size; + struct journal_keys new_keys = { .nr = keys->nr, .size = max_t(size_t, keys->size, 8) * 2, @@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, keys->gap = keys->nr; } - journal_iters_move_gap(c, keys->gap, idx); + journal_iters_move_gap(c, old_gap, idx); move_gap(keys, idx); @@ -261,6 +289,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, return bch2_journal_key_insert(c, id, level, &whiteout); } +bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, + unsigned level, struct bpos pos) +{ + struct journal_keys *keys = &trans->c->journal_keys; + size_t idx = bch2_journal_key_search(keys, btree, level, pos); + + if (!trans->journal_replay_not_finished) + return false; + + return (idx < keys->size && + keys->data[idx].btree_id == btree && + keys->data[idx].level == level && + bpos_eq(keys->data[idx].k->k.p, pos) && + bkey_deleted(&keys->data[idx].k->k)); +} + void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos pos) { @@ -285,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) { - struct journal_key *k = iter->keys->data + iter->idx; + journal_iter_verify(iter); + + while (iter->idx < iter->keys->size) { + struct journal_key *k = iter->keys->data + iter->idx; + + int cmp = cmp_int(k->btree_id, iter->btree_id) ?: + cmp_int(k->level, iter->level); + if (cmp > 0) + break; + BUG_ON(cmp); - while (k < iter->keys->data + iter->keys->size && - k->btree_id == iter->btree_id && - k->level == iter->level) { if (!k->overwritten) return bkey_i_to_s_c(k->k); bch2_journal_iter_advance(iter); - k = iter->keys->data + iter->idx; } return bkey_s_c_null; @@ -314,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c, iter->level = level; iter->keys = &c->journal_keys; iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos); + + journal_iter_verify(iter); } static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) @@ -363,7 +414,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) { - struct bkey_s_c btree_k, journal_k, ret; + struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; if (iter->prefetch && iter->journal.level) btree_and_journal_iter_prefetch(iter); @@ -375,9 +426,10 @@ again: bpos_lt(btree_k.k->p, iter->pos)) bch2_journal_iter_advance_btree(iter); - while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && - bpos_lt(journal_k.k->p, iter->pos)) - bch2_journal_iter_advance(&iter->journal); + if (iter->trans->journal_replay_not_finished) + while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && + bpos_lt(journal_k.k->p, iter->pos)) + bch2_journal_iter_advance(&iter->journal); ret = journal_k.k && (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p)) @@ -417,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, iter->trans = trans; iter->b = b; iter->node_iter = node_iter; - bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); - INIT_LIST_HEAD(&iter->journal.list); iter->pos = b->data->min_key; iter->at_end = false; + INIT_LIST_HEAD(&iter->journal.list); + + if (trans->journal_replay_not_finished) { + bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); + if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) + list_add(&iter->journal.list, &trans->c->journal_iters); + } } /* @@ -435,7 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, bch2_btree_node_iter_init_from_start(&node_iter, b); __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key); - list_add(&iter->journal.list, &trans->c->journal_iters); } /* sort and dedup all keys in the journal: */ @@ -548,3 +604,22 @@ int bch2_journal_keys_sort(struct bch_fs *c) bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr); return 0; } + +void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, + unsigned level_min, unsigned level_max, + struct bpos start, struct bpos end) +{ + struct journal_keys *keys = &c->journal_keys; + size_t dst = 0; + + move_gap(keys, keys->nr); + + darray_for_each(*keys, i) + if (!(i->btree_id == btree && + i->level >= level_min && + i->level <= level_max && + bpos_ge(i->k->k.p, start) && + bpos_le(i->k->k.p, end))) + keys->data[dst++] = *i; + keys->nr = keys->gap = dst; +} diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index c9d19da3ea04..af25046ebcaa 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id, unsigned, struct bkey_i *); int bch2_journal_key_delete(struct bch_fs *, enum btree_id, unsigned, struct bpos); -void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, - unsigned, struct bpos); +bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos); +void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); @@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *); int bch2_journal_keys_sort(struct bch_fs *); +void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, + unsigned, unsigned, + struct bpos, struct bpos); + #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 581edcb0911b..88a3582a3275 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, } else { mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_pcpu); + bc->nr_freed_pcpu++; mutex_unlock(&bc->lock); } } @@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, if (!list_empty(&bc->freed_pcpu)) { ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); list_del_init(&ck->list); + bc->nr_freed_pcpu--; } mutex_unlock(&bc->lock); } @@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, commit_flags |= BCH_WATERMARK_reclaim; if (ck->journal.seq != journal_last_seq(j) || - j->watermark == BCH_WATERMARK_stripe) + !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; ret = bch2_btree_iter_traverse(&b_iter) ?: diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index b9b151e693ed..f2caf491957e 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, struct btree_path *path, struct btree_bkey_cached_common *b) { - struct btree_path *linked; - unsigned i, iter; - int ret; - - /* - * XXX BIG FAT NOTICE - * - * Drop all read locks before taking a write lock: - * - * This is a hack, because bch2_btree_node_lock_write_nofail() is a - * hack - but by dropping read locks first, this should never fail, and - * we only use this in code paths where whatever read locks we've - * already taken are no longer needed: - */ - - trans_for_each_path(trans, linked, iter) { - if (!linked->nodes_locked) - continue; - - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_read_locked(linked, i)) { - btree_node_unlock(trans, linked, i); - btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK); - } - } - - ret = __btree_node_lock_write(trans, path, b, true); + int ret = __btree_node_lock_write(trans, path, b, true); BUG_ON(ret); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c new file mode 100644 index 000000000000..866bd278439f --- /dev/null +++ b/fs/bcachefs/btree_node_scan.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "btree_cache.h" +#include "btree_io.h" +#include "btree_journal_iter.h" +#include "btree_node_scan.h" +#include "btree_update_interior.h" +#include "buckets.h" +#include "error.h" +#include "journal_io.h" +#include "recovery_passes.h" + +#include +#include + +struct find_btree_nodes_worker { + struct closure *cl; + struct find_btree_nodes *f; + struct bch_dev *ca; +}; + +static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) +{ + prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie); + bch2_bpos_to_text(out, n->min_key); + prt_str(out, "-"); + bch2_bpos_to_text(out, n->max_key); + + if (n->range_updated) + prt_str(out, " range updated"); + if (n->overwritten) + prt_str(out, " overwritten"); + + for (unsigned i = 0; i < n->nr_ptrs; i++) { + prt_char(out, ' '); + bch2_extent_ptr_to_text(out, c, n->ptrs + i); + } +} + +static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes) +{ + printbuf_indent_add(out, 2); + darray_for_each(nodes, i) { + found_btree_node_to_text(out, c, i); + prt_newline(out); + } + printbuf_indent_sub(out, 2); +} + +static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f) +{ + struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k); + + set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs); + bp->k.p = f->max_key; + bp->v.seq = cpu_to_le64(f->cookie); + bp->v.sectors_written = 0; + bp->v.flags = 0; + bp->v.min_key = f->min_key; + SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated); + memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); +} + +static bool found_btree_node_is_readable(struct btree_trans *trans, + const struct found_btree_node *f) +{ + struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k; + + found_btree_node_to_key(&k.k, f); + + struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); + bool ret = !IS_ERR_OR_NULL(b); + if (ret) + six_unlock_read(&b->c.lock); + + /* + * We might update this node's range; if that happens, we need the node + * to be re-read so the read path can trim keys that are no longer in + * this node + */ + if (b != btree_node_root(trans->c, b)) + bch2_btree_node_evict(trans, &k.k); + return ret; +} + +static int found_btree_node_cmp_cookie(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: + cmp_int(l->cookie, r->cookie); +} + +/* + * Given two found btree nodes, if their sequence numbers are equal, take the + * one that's readable: + */ +static int found_btree_node_cmp_time(const struct found_btree_node *l, + const struct found_btree_node *r) +{ + return cmp_int(l->seq, r->seq); +} + +static int found_btree_node_cmp_pos(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(l->level, r->level) ?: + bpos_cmp(l->min_key, r->min_key) ?: + -found_btree_node_cmp_time(l, r); +} + +static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, + struct bio *bio, struct btree_node *bn, u64 offset) +{ + struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, bn, PAGE_SIZE); + + submit_bio_wait(bio); + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, + "IO error in try_read_btree_node() at %llu: %s", + offset, bch2_blk_status_to_str(bio->bi_status))) + return; + + if (le64_to_cpu(bn->magic) != bset_magic(c)) + return; + + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { + struct nonce nonce = btree_nonce(&bn->keys, 0); + unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; + + bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes); + } + + if (btree_id_is_alloc(BTREE_NODE_ID(bn))) + return; + + if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) + return; + + rcu_read_lock(); + struct found_btree_node n = { + .btree_id = BTREE_NODE_ID(bn), + .level = BTREE_NODE_LEVEL(bn), + .seq = BTREE_NODE_SEQ(bn), + .cookie = le64_to_cpu(bn->keys.seq), + .min_key = bn->min_key, + .max_key = bn->max_key, + .nr_ptrs = 1, + .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr, + .ptrs[0].offset = offset, + .ptrs[0].dev = ca->dev_idx, + .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)), + }; + rcu_read_unlock(); + + if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { + mutex_lock(&f->lock); + if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { + bch_err(c, "try_read_btree_node() can't handle endian conversion"); + f->ret = -EINVAL; + goto unlock; + } + + if (darray_push(&f->nodes, n)) + f->ret = -ENOMEM; +unlock: + mutex_unlock(&f->lock); + } +} + +static int read_btree_nodes_worker(void *p) +{ + struct find_btree_nodes_worker *w = p; + struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); + struct bch_dev *ca = w->ca; + void *buf = (void *) __get_free_page(GFP_KERNEL); + struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); + unsigned long last_print = jiffies; + + if (!buf || !bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + w->f->ret = -ENOMEM; + goto err; + } + + for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++) + for (unsigned bucket_offset = 0; + bucket_offset + btree_sectors(c) <= ca->mi.bucket_size; + bucket_offset += btree_sectors(c)) { + if (time_after(jiffies, last_print + HZ * 30)) { + u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset; + u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size; + + bch_info(ca, "%s: %2u%% done", __func__, + (unsigned) div64_u64(cur_sector * 100, end_sector)); + last_print = jiffies; + } + + u64 sector = bucket * ca->mi.bucket_size + bucket_offset; + + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap && + !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) + continue; + + try_read_btree_node(w->f, ca, bio, buf, sector); + } +err: + bio_put(bio); + free_page((unsigned long) buf); + percpu_ref_get(&ca->io_ref); + closure_put(w->cl); + kfree(w); + return 0; +} + +static int read_btree_nodes(struct find_btree_nodes *f) +{ + struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + struct closure cl; + int ret = 0; + + closure_init_stack(&cl); + + for_each_online_member(c, ca) { + if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree))) + continue; + + struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL); + struct task_struct *t; + + if (!w) { + percpu_ref_put(&ca->io_ref); + ret = -ENOMEM; + goto err; + } + + percpu_ref_get(&ca->io_ref); + closure_get(&cl); + w->cl = &cl; + w->f = f; + w->ca = ca; + + t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); + ret = IS_ERR_OR_NULL(t); + if (ret) { + percpu_ref_put(&ca->io_ref); + closure_put(&cl); + f->ret = ret; + bch_err(c, "error starting kthread: %i", ret); + break; + } + } +err: + closure_sync(&cl); + return f->ret ?: ret; +} + +static void bubble_up(struct found_btree_node *n, struct found_btree_node *end) +{ + while (n + 1 < end && + found_btree_node_cmp_pos(n, n + 1) > 0) { + swap(n[0], n[1]); + n++; + } +} + +static int handle_overwrites(struct bch_fs *c, + struct found_btree_node *start, + struct found_btree_node *end) +{ + struct found_btree_node *n; +again: + for (n = start + 1; + n < end && + n->btree_id == start->btree_id && + n->level == start->level && + bpos_lt(n->min_key, start->max_key); + n++) { + int cmp = found_btree_node_cmp_time(start, n); + + if (cmp > 0) { + if (bpos_cmp(start->max_key, n->max_key) >= 0) + n->overwritten = true; + else { + n->range_updated = true; + n->min_key = bpos_successor(start->max_key); + n->range_updated = true; + bubble_up(n, end); + goto again; + } + } else if (cmp < 0) { + BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0); + + start->max_key = bpos_predecessor(n->min_key); + start->range_updated = true; + } else { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "overlapping btree nodes with same seq! halting\n "); + found_btree_node_to_text(&buf, c, start); + prt_str(&buf, "\n "); + found_btree_node_to_text(&buf, c, n); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + return -BCH_ERR_fsck_repair_unimplemented; + } + } + + return 0; +} + +int bch2_scan_for_btree_nodes(struct bch_fs *c) +{ + struct find_btree_nodes *f = &c->found_btree_nodes; + struct printbuf buf = PRINTBUF; + size_t dst; + int ret = 0; + + if (f->nodes.nr) + return 0; + + mutex_init(&f->lock); + + ret = read_btree_nodes(f); + if (ret) + return ret; + + if (!f->nodes.nr) { + bch_err(c, "%s: no btree nodes found", __func__); + ret = -EINVAL; + goto err; + } + + if (0 && c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes found:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); + + dst = 0; + darray_for_each(f->nodes, i) { + struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL; + + if (prev && + prev->cookie == i->cookie) { + if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) { + bch_err(c, "%s: found too many replicas for btree node", __func__); + ret = -EINVAL; + goto err; + } + prev->ptrs[prev->nr_ptrs++] = i->ptrs[0]; + } else { + f->nodes.data[dst++] = *i; + } + } + f->nodes.nr = dst; + + sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); + + if (0 && c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + dst = 0; + darray_for_each(f->nodes, i) { + if (i->overwritten) + continue; + + ret = handle_overwrites(c, i, &darray_top(f->nodes)); + if (ret) + goto err; + + BUG_ON(i->overwritten); + f->nodes.data[dst++] = *i; + } + f->nodes.nr = dst; + + if (c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); +err: + printbuf_exit(&buf); + return ret; +} + +static int found_btree_node_range_start_cmp(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(l->level, r->level) ?: + bpos_cmp(l->max_key, r->min_key); +} + +#define for_each_found_btree_node_in_range(_f, _search, _idx) \ + for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \ + sizeof((_f)->nodes.data[0]), \ + found_btree_node_range_start_cmp, &search); \ + _idx < (_f)->nodes.nr && \ + (_f)->nodes.data[_idx].btree_id == _search.btree_id && \ + (_f)->nodes.data[_idx].level == _search.level && \ + bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \ + _idx = eytzinger0_next(_idx, (_f)->nodes.nr)) + +bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) +{ + struct find_btree_nodes *f = &c->found_btree_nodes; + + struct found_btree_node search = { + .btree_id = b->c.btree_id, + .level = b->c.level, + .min_key = b->data->min_key, + .max_key = b->key.k.p, + }; + + for_each_found_btree_node_in_range(f, search, idx) + if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data)) + return true; + return false; +} + +bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) +{ + struct found_btree_node search = { + .btree_id = btree, + .level = 0, + .min_key = POS_MIN, + .max_key = SPOS_MAX, + }; + + for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx) + return true; + return false; +} + +int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, + unsigned level, struct bpos node_min, struct bpos node_max) +{ + if (btree_id_is_alloc(btree)) + return 0; + + struct find_btree_nodes *f = &c->found_btree_nodes; + + int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + return ret; + + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); + bch2_bpos_to_text(&buf, node_min); + prt_str(&buf, " - "); + bch2_bpos_to_text(&buf, node_max); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + + struct found_btree_node search = { + .btree_id = btree, + .level = level, + .min_key = node_min, + .max_key = node_max, + }; + + for_each_found_btree_node_in_range(f, search, idx) { + struct found_btree_node n = f->nodes.data[idx]; + + n.range_updated |= bpos_lt(n.min_key, node_min); + n.min_key = bpos_max(n.min_key, node_min); + + n.range_updated |= bpos_gt(n.max_key, node_max); + n.max_key = bpos_min(n.max_key, node_max); + + struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; + + found_btree_node_to_key(&tmp.k, &n); + + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); + bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); + printbuf_exit(&buf); + + BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + + ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); + if (ret) + return ret; + } + + return 0; +} + +void bch2_find_btree_nodes_exit(struct find_btree_nodes *f) +{ + darray_exit(&f->nodes); +} diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h new file mode 100644 index 000000000000..08687b209787 --- /dev/null +++ b/fs/bcachefs/btree_node_scan.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_NODE_SCAN_H +#define _BCACHEFS_BTREE_NODE_SCAN_H + +int bch2_scan_for_btree_nodes(struct bch_fs *); +bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *); +bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); +int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos); +void bch2_find_btree_nodes_exit(struct find_btree_nodes *); + +#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */ diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h new file mode 100644 index 000000000000..abb7b27d556a --- /dev/null +++ b/fs/bcachefs/btree_node_scan_types.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H +#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H + +#include "darray.h" + +struct found_btree_node { + bool range_updated:1; + bool overwritten:1; + u8 btree_id; + u8 level; + u32 seq; + u64 cookie; + + struct bpos min_key; + struct bpos max_key; + + unsigned nr_ptrs; + struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX]; +}; + +typedef DARRAY(struct found_btree_node) found_btree_nodes; + +struct find_btree_nodes { + int ret; + struct mutex lock; + found_btree_nodes nodes; +}; + +#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 30d69a6d133e..bbec91e8e650 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && - bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); + bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0); } static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, @@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags struct bkey_cached *ck = (void *) path->l[0].b; unsigned new_u64s; struct bkey_i *new_k; + unsigned watermark = flags & BCH_WATERMARK_MASK; EBUG_ON(path->level); - if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && - bch2_btree_key_cache_must_wait(c) && - !(flags & BCH_TRANS_COMMIT_journal_reclaim)) + if (watermark < BCH_WATERMARK_reclaim && + !test_bit(BKEY_CACHED_DIRTY, &ck->flags) && + bch2_btree_key_cache_must_wait(c)) return -BCH_ERR_btree_insert_need_journal_reclaim; /* @@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ } static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, - struct btree_insert_entry *btree_id_start) + unsigned btree_id_start) { - struct btree_insert_entry *i; bool trans_trigger_run; int ret, overwrite; @@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, do { trans_trigger_run = false; - for (i = btree_id_start; - i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; + for (unsigned i = btree_id_start; + i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; i++) { - if (i->btree_id != btree_id) + if (trans->updates[i].btree_id != btree_id) continue; - ret = run_one_trans_trigger(trans, i, overwrite); + ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); if (ret < 0) return ret; if (ret) @@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, static int bch2_trans_commit_run_triggers(struct btree_trans *trans) { - struct btree_insert_entry *btree_id_start = trans->updates; - unsigned btree_id = 0; + unsigned btree_id = 0, btree_id_start = 0; int ret = 0; /* @@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) if (btree_id == BTREE_ID_alloc) continue; - while (btree_id_start < trans->updates + trans->nr_updates && - btree_id_start->btree_id < btree_id) + while (btree_id_start < trans->nr_updates && + trans->updates[btree_id_start].btree_id < btree_id) btree_id_start++; ret = run_btree_triggers(trans, btree_id, btree_id_start); @@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) return ret; } - trans_for_each_update(trans, i) { + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { + struct btree_insert_entry *i = trans->updates + idx; + if (i->btree_id > BTREE_ID_alloc) break; if (i->btree_id == BTREE_ID_alloc) { - ret = run_btree_triggers(trans, BTREE_ID_alloc, i); + ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); if (ret) return ret; break; @@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags struct bch_fs *c = trans->c; int ret = 0, u64s_delta = 0; - trans_for_each_update(trans, i) { + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { + struct btree_insert_entry *i = trans->updates + idx; if (i->cached) continue; @@ -887,6 +889,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; + enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; switch (ret) { case -BCH_ERR_btree_insert_btree_node_full: @@ -905,7 +908,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, * flag */ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && - (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { + watermark < BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 9404d96c38f3..e0c982a4195c 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -364,7 +364,21 @@ struct btree_insert_entry { unsigned long ip_allocated; }; +/* Number of btree paths we preallocate, usually enough */ #define BTREE_ITER_INITIAL 64 +/* + * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code + * paths should run inside this limit, and if they don't it usually indicates a + * bug (leaking/duplicated btree paths). + * + * exception: some fsck paths + * + * bugs with excessive path usage seem to have possibly been eliminated now, so + * we might consider eliminating this (and btree_trans_too_many_iter()) at some + * point. + */ +#define BTREE_ITER_NORMAL_LIMIT 256 +/* never exceed limit */ #define BTREE_ITER_MAX (1U << 10) struct btree_trans_commit_hook; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index a4b40c1656a5..8e47e260eba5 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans, struct bkey_i *update; int ret; + if (unlikely(trans->journal_replay_not_finished)) + return 0; + update = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(update); if (ret) @@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret; + if (unlikely(trans->journal_replay_not_finished)) + return 0; + ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); if (ret < 0) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b2f5f2e50f7e..6030c396754f 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_buf.h" #include "bkey_methods.h" #include "btree_cache.h" #include "btree_gc.h" @@ -18,12 +19,21 @@ #include "journal.h" #include "journal_reclaim.h" #include "keylist.h" +#include "recovery_passes.h" #include "replicas.h" +#include "sb-members.h" #include "super-io.h" #include "trace.h" #include +static const char * const bch2_btree_update_modes[] = { +#define x(t) #t, + BTREE_UPDATE_MODES() +#undef x + NULL +}; + static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, btree_path_idx_t, struct btree *, struct keylist *); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -44,56 +54,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans, return path_idx; } -/* Debug code: */ - /* * Verify that child nodes correctly span parent node's range: */ -static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) +int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) { -#ifdef CONFIG_BCACHEFS_DEBUG - struct bpos next_node = b->data->min_key; - struct btree_node_iter iter; + struct bch_fs *c = trans->c; + struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2 + ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key + : b->data->min_key; + struct btree_and_journal_iter iter; struct bkey_s_c k; - struct bkey_s_c_btree_ptr_v2 bp; - struct bkey unpacked; - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; + struct bkey_buf prev; + int ret = 0; - BUG_ON(!b->c.level); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); - if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) - return; + if (!b->c.level) + return 0; - bch2_btree_node_iter_init_from_start(&iter, b); + bch2_bkey_buf_init(&prev); + bkey_init(&prev.k->k); + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); - while (1) { - k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked); + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) - break; - bp = bkey_s_c_to_btree_ptr_v2(k); + goto out; - if (!bpos_eq(next_node, bp.v->min_key)) { - bch2_dump_btree_node(c, b); - bch2_bpos_to_text(&buf1, next_node); - bch2_bpos_to_text(&buf2, bp.v->min_key); - panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf); + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); + + struct bpos expected_min = bkey_deleted(&prev.k->k) + ? node_min + : bpos_successor(prev.k->k.p); + + if (!bpos_eq(expected_min, bp.v->min_key)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "end of prev node doesn't match start of next node\n"), + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n prev "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + prt_str(&buf, "\n next "); + bch2_bkey_val_to_text(&buf, c, k); + + need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf); + goto topology_repair; } - bch2_btree_node_iter_advance(&iter, b); - - if (bch2_btree_node_iter_end(&iter)) { - if (!bpos_eq(k.k->p, b->key.k.p)) { - bch2_dump_btree_node(c, b); - bch2_bpos_to_text(&buf1, b->key.k.p); - bch2_bpos_to_text(&buf2, k.k->p); - panic("expected end %s got %s\n", buf1.buf, buf2.buf); - } - break; - } - - next_node = bpos_successor(k.k->p); + bch2_bkey_buf_reassemble(&prev, c, k); + bch2_btree_and_journal_iter_advance(&iter); } -#endif + + if (bkey_deleted(&prev.k->k)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "empty interior node\n"); + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf); + goto topology_repair; + } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "last child node doesn't end at end of parent node\n"); + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n last key "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + + need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf); + goto topology_repair; + } +out: +fsck_err: + bch2_btree_and_journal_iter_exit(&iter); + bch2_bkey_buf_exit(&prev, c); + printbuf_exit(&buf); + return ret; +topology_repair: + if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) && + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) { + bch2_inconsistent_error(c); + ret = -BCH_ERR_btree_need_topology_repair; + } else { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + } + goto out; } /* Calculate ideal packed bkey format for new btree nodes: */ @@ -254,7 +311,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct open_buckets obs = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim + unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim ? BTREE_NODE_RESERVE : 0; int ret; @@ -549,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as, bch2_keylist_push(keys); } +static bool btree_update_new_nodes_marked_sb(struct btree_update *as) +{ + for_each_keylist_key(&as->new_keys, k) + if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k))) + return false; + return true; +} + +static void btree_update_new_nodes_mark_sb(struct btree_update *as) +{ + struct bch_fs *c = as->c; + + mutex_lock(&c->sb_lock); + for_each_keylist_key(&as->new_keys, k) + bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} + /* * The transactional part of an interior btree node update, where we journal the * update we did to the interior node and update alloc info: @@ -606,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as) if (ret) goto err; + if (!btree_update_new_nodes_marked_sb(as)) + btree_update_new_nodes_mark_sb(as); + /* * Wait for any in flight writes to finish before we free the old nodes * on disk: @@ -638,7 +718,7 @@ static void btree_update_nodes_written(struct btree_update *as) * which may require allocations as well. */ ret = commit_do(trans, &as->disk_res, &journal_seq, - BCH_WATERMARK_reclaim| + BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_journal_reclaim, @@ -648,9 +728,13 @@ static void btree_update_nodes_written(struct btree_update *as) bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, "%s", bch2_err_str(ret)); err: - if (as->b) { - - b = as->b; + /* + * We have to be careful because another thread might be getting ready + * to free as->b and calling btree_update_reparent() on us - we'll + * recheck under btree_update_lock below: + */ + b = READ_ONCE(as->b); + if (b) { btree_path_idx_t path_idx = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p); struct btree_path *path = trans->paths + path_idx; @@ -794,15 +878,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - - BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode != BTREE_UPDATE_none); + BUG_ON(as->update_level_end < b->c.level); BUG_ON(!btree_node_dirty(b)); BUG_ON(!b->c.level); - as->mode = BTREE_INTERIOR_UPDATING_NODE; + mutex_lock(&c->btree_interior_update_lock); + list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); + + as->mode = BTREE_UPDATE_node; as->b = b; + as->update_level_end = b->c.level; set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); @@ -824,7 +910,7 @@ static void btree_update_reparent(struct btree_update *as, lockdep_assert_held(&c->btree_interior_update_lock); child->b = NULL; - child->mode = BTREE_INTERIOR_UPDATING_AS; + child->mode = BTREE_UPDATE_update; bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, bch2_update_reparent_journal_pin_flush); @@ -835,7 +921,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) struct bkey_i *insert = &b->key; struct bch_fs *c = as->c; - BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode != BTREE_UPDATE_none); BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > ARRAY_SIZE(as->journal_entries)); @@ -849,7 +935,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - as->mode = BTREE_INTERIOR_UPDATING_ROOT; + as->mode = BTREE_UPDATE_root; mutex_unlock(&c->btree_interior_update_lock); } @@ -1027,7 +1113,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * struct bch_fs *c = as->c; u64 start_time = as->start_time; - BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode == BTREE_UPDATE_none); if (as->took_gc_lock) up_read(&as->c->gc_lock); @@ -1044,7 +1130,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level, bool split, unsigned flags) + unsigned level_start, bool split, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; @@ -1052,7 +1138,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc) ? BCH_DISK_RESERVATION_NOFAIL : 0; unsigned nr_nodes[2] = { 0, 0 }; - unsigned update_level = level; + unsigned level_end = level_start; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; int ret = 0; u32 restart_count = trans->restart_count; @@ -1067,34 +1153,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; - if (watermark < c->journal.watermark) { - struct journal_res res = { 0 }; - unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; + if (watermark < BCH_WATERMARK_reclaim && + test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) { + if (flags & BCH_TRANS_COMMIT_journal_reclaim) + return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock); - if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark != BCH_WATERMARK_reclaim) - journal_flags |= JOURNAL_RES_GET_NONBLOCK; - - ret = drop_locks_do(trans, - bch2_journal_res_get(&c->journal, &res, 1, journal_flags)); - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - ret = -BCH_ERR_journal_reclaim_would_deadlock; + bch2_trans_unlock(trans); + wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)); + ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); } while (1) { - nr_nodes[!!update_level] += 1 + split; - update_level++; + nr_nodes[!!level_end] += 1 + split; + level_end++; - ret = bch2_btree_path_upgrade(trans, path, update_level + 1); + ret = bch2_btree_path_upgrade(trans, path, level_end + 1); if (ret) return ERR_PTR(ret); - if (!btree_path_node(path, update_level)) { + if (!btree_path_node(path, level_end)) { /* Allocating new root? */ nr_nodes[1] += split; - update_level = BTREE_MAX_DEPTH; + level_end = BTREE_MAX_DEPTH; break; } @@ -1102,11 +1184,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, * Always check for space for two keys, even if we won't have to * split at prior level - it might have been a merge instead: */ - if (bch2_btree_node_insert_fits(path->l[update_level].b, + if (bch2_btree_node_insert_fits(path->l[level_end].b, BKEY_BTREE_PTR_U64s_MAX * 2)) break; - split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); + split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); } if (!down_read_trylock(&c->gc_lock)) { @@ -1120,13 +1202,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS); memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); - as->c = c; - as->start_time = start_time; - as->ip_started = _RET_IP_; - as->mode = BTREE_INTERIOR_NO_UPDATE; - as->took_gc_lock = true; - as->btree_id = path->btree_id; - as->update_level = update_level; + as->c = c; + as->start_time = start_time; + as->ip_started = _RET_IP_; + as->mode = BTREE_UPDATE_none; + as->watermark = watermark; + as->took_gc_lock = true; + as->btree_id = path->btree_id; + as->update_level_start = level_start; + as->update_level_end = level_end; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1168,7 +1252,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, */ if (bch2_err_matches(ret, ENOSPC) && (flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark != BCH_WATERMARK_reclaim) { + watermark < BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; goto err; } @@ -1220,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) bch2_recalc_btree_reserve(c); } -static void bch2_btree_set_root(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b) +static int bch2_btree_set_root(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + bool nofail) { struct bch_fs *c = as->c; - struct btree *old; trace_and_count(c, btree_node_set_root, trans, b); - old = btree_node_root(c, b); + struct btree *old = btree_node_root(c, b); /* * Ensure no one is using the old root while we switch to the * new root: */ - bch2_btree_node_lock_write_nofail(trans, path, &old->c); + if (nofail) { + bch2_btree_node_lock_write_nofail(trans, path, &old->c); + } else { + int ret = bch2_btree_node_lock_write(trans, path, &old->c); + if (ret) + return ret; + } bch2_btree_set_root_inmem(c, b); @@ -1250,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as, * depend on the new root would have to update the new root. */ bch2_btree_node_unlock_write(trans, path, old); + return 0; } /* Interior node updates: */ @@ -1316,12 +1407,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, } static void -__bch2_btree_insert_keys_interior(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct btree_node_iter node_iter, - struct keylist *keys) +bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter node_iter, + struct keylist *keys) { struct bkey_i *insert = bch2_keylist_front(keys); struct bkey_packed *k; @@ -1380,9 +1471,16 @@ static void __btree_split_node(struct btree_update *as, if (bkey_deleted(k)) continue; + uk = bkey_unpack_key(b, k); + + if (b->c.level && + u64s < n1_u64s && + u64s + k->u64s >= n1_u64s && + bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p)) + n1_u64s += k->u64s; + i = u64s >= n1_u64s; u64s += k->u64s; - uk = bkey_unpack_key(b, k); if (!i) n1_pos = uk.p; bch2_bkey_format_add_key(&format[i], &uk); @@ -1441,8 +1539,7 @@ static void __btree_split_node(struct btree_update *as, bch2_verify_btree_nr_keys(n[i]); - if (b->c.level) - btree_node_interior_verify(as->c, n[i]); + BUG_ON(bch2_btree_node_check_topology(trans, n[i])); } } @@ -1471,9 +1568,9 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); - btree_node_interior_verify(as->c, b); + BUG_ON(bch2_btree_node_check_topology(trans, b)); } } @@ -1488,9 +1585,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, u64 start_time = local_clock(); int ret = 0; + bch2_verify_btree_nr_keys(b); BUG_ON(!parent && (b != btree_node_root(c, b))); BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + bch2_btree_interior_update_will_free_node(as, b); if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { @@ -1581,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, if (parent) { /* Split a non root node */ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys); - if (ret) - goto err; } else if (n3) { - bch2_btree_set_root(as, trans, trans->paths + path, n3); + ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, trans, trans->paths + path, n1); + ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false); } + if (ret) + goto err; + if (n3) { bch2_btree_update_get_open_buckets(as, n3); bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); @@ -1646,27 +1749,6 @@ err: goto out; } -static void -bch2_btree_insert_keys_interior(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct keylist *keys) -{ - struct btree_path *linked; - unsigned i; - - __bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - - btree_update_updated_node(as, b); - - trans_for_each_path_with_node(trans, b, linked, i) - bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); - - bch2_trans_verify_paths(trans); -} - /** * bch2_btree_insert_node - insert bkeys into a given btree node * @@ -1687,7 +1769,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t struct keylist *keys) { struct bch_fs *c = as->c; - struct btree_path *path = trans->paths + path_idx; + struct btree_path *path = trans->paths + path_idx, *linked; + unsigned i; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; @@ -1710,9 +1793,19 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - btree_node_interior_verify(c, b); + ret = bch2_btree_node_check_topology(trans, b); + if (ret) { + bch2_btree_node_unlock_write(trans, path, b); + return ret; + } - bch2_btree_insert_keys_interior(as, trans, path, b, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); + + trans_for_each_path_with_node(trans, b, linked, i) + bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); + + bch2_trans_verify_paths(trans); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; @@ -1726,16 +1819,17 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_maybe_compact_whiteouts(c, b)) bch2_trans_node_reinit_iter(trans, b); + btree_update_updated_node(as, b); bch2_btree_node_unlock_write(trans, path, b); - btree_node_interior_verify(c, b); + BUG_ON(bch2_btree_node_check_topology(trans, b)); return 0; split: /* * We could attempt to avoid the transaction restart, by calling * bch2_btree_path_upgrade() and allocating more nodes: */ - if (b->c.level >= as->update_level) { + if (b->c.level >= as->update_level_end) { trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b); return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); } @@ -1801,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * bch2_keylist_add(&as->parent_keys, &b->key); btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys); - bch2_btree_set_root(as, trans, path, n); + int ret = bch2_btree_set_root(as, trans, path, n, true); + BUG_ON(ret); + bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_trans_node_add(trans, path, n); @@ -1818,9 +1914,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, { struct bch_fs *c = trans->c; struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b; + + if (btree_node_fake(b)) + return bch2_btree_split_leaf(trans, path, flags); + struct btree_update *as = - bch2_btree_update_start(trans, trans->paths + path, - b->c.level, true, flags); + bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -1851,6 +1950,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, BUG_ON(!trans->paths[path].should_be_locked); BUG_ON(!btree_node_locked(&trans->paths[path], level)); + /* + * Work around a deadlock caused by the btree write buffer not doing + * merges and leaving tons of merges for us to do - we really don't need + * to be doing merges at all from the interior update path, and if the + * interior update path is generating too many new interior updates we + * deadlock: + */ + if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) + return 0; + + flags &= ~BCH_WATERMARK_MASK; + b = trans->paths[path].l[level].b; if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) || @@ -1996,6 +2107,10 @@ err: bch2_path_put(trans, new_path, true); bch2_path_put(trans, sib_path, true); bch2_trans_verify_locks(trans); + if (ret == -BCH_ERR_journal_reclaim_would_deadlock) + ret = 0; + if (!ret) + ret = bch2_trans_relock(trans); return ret; err_free_update: bch2_btree_node_free_never_used(as, trans, n); @@ -2041,12 +2156,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys); - if (ret) - goto err; } else { - bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n); + ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false); } + if (ret) + goto err; + bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); @@ -2391,7 +2507,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) bch2_btree_set_root_inmem(c, b); } -static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) +static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level) { struct bch_fs *c = trans->c; struct closure cl; @@ -2410,7 +2526,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) set_btree_node_fake(b); set_btree_node_need_rewrite(b); - b->c.level = 0; + b->c.level = level; b->c.btree_id = id; bkey_btree_ptr_init(&b->key); @@ -2437,9 +2553,23 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) return 0; } -void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) +void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, __bch2_btree_root_alloc(trans, id)); + bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level)); +} + +static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) +{ + prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + (void *) as->ip_started, + bch2_btree_id_str(as->btree_id), + as->update_level_start, + as->update_level_end, + bch2_watermarks[as->watermark], + bch2_btree_update_modes[as->mode], + as->nodes_written, + closure_nr_remaining(&as->cl), + as->journal.seq); } void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) @@ -2448,12 +2578,7 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) mutex_lock(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) - prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n", - (void *) as->ip_started, - as->mode, - as->nodes_written, - closure_nr_remaining(&as->cl), - as->journal.seq); + bch2_btree_update_to_text(out, as); mutex_unlock(&c->btree_interior_update_lock); } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index f651dd48aaa0..c1a479ebaad1 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -10,6 +10,20 @@ #define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) +int bch2_btree_node_check_topology(struct btree_trans *, struct btree *); + +#define BTREE_UPDATE_MODES() \ + x(none) \ + x(node) \ + x(root) \ + x(update) + +enum btree_update_mode { +#define x(n) BTREE_UPDATE_##n, + BTREE_UPDATE_MODES() +#undef x +}; + /* * Tracks an in progress split/rewrite of a btree node and the update to the * parent node: @@ -37,24 +51,19 @@ struct btree_update { struct list_head list; struct list_head unwritten_list; - /* What kind of update are we doing? */ - enum { - BTREE_INTERIOR_NO_UPDATE, - BTREE_INTERIOR_UPDATING_NODE, - BTREE_INTERIOR_UPDATING_ROOT, - BTREE_INTERIOR_UPDATING_AS, - } mode; - + enum btree_update_mode mode; + enum bch_watermark watermark; unsigned nodes_written:1; unsigned took_gc_lock:1; enum btree_id btree_id; - unsigned update_level; + unsigned update_level_start; + unsigned update_level_end; struct disk_reservation disk_res; /* - * BTREE_INTERIOR_UPDATING_NODE: + * BTREE_UPDATE_node: * The update that made the new nodes visible was a regular update to an * existing interior node - @b. We can't write out the update to @b * until the new nodes we created are finished writing, so we block @b @@ -163,7 +172,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, struct bkey_i *, unsigned, bool); void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); -void bch2_btree_root_alloc(struct bch_fs *, enum btree_id); +void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned); static inline unsigned btree_update_reserve_required(struct bch_fs *c, struct btree *b) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 5cbad8445782..36a6f42aba5e 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -11,6 +11,7 @@ #include "journal_reclaim.h" #include +#include static int bch2_btree_write_buffer_journal_flush(struct journal *, struct journal_entry_pin *, u64); @@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke #endif } +static int wb_key_seq_cmp(const void *_l, const void *_r) +{ + const struct btree_write_buffered_key *l = _l; + const struct btree_write_buffered_key *r = _r; + + return cmp_int(l->journal_seq, r->journal_seq); +} + /* Compare excluding idx, the low 24 bits: */ static inline bool wb_key_eq(const void *_l, const void *_r) { @@ -307,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { bch2_btree_node_unlock_write(trans, path, path->l[0].b); write_locked = false; + + ret = lockrestart_do(trans, + bch2_btree_iter_traverse(&iter) ?: + bch2_foreground_maybe_merge(trans, iter.path, 0, + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc)); + if (ret) + goto err; } } @@ -357,6 +376,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) */ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); + sort(wb->flushing.keys.data, + wb->flushing.keys.nr, + sizeof(wb->flushing.keys.data[0]), + wb_key_seq_cmp, NULL); + darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) continue; @@ -368,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) ret = commit_do(trans, NULL, NULL, BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim, + BCH_TRANS_COMMIT_no_journal_res , btree_write_buffered_insert(trans, i)); if (ret) goto err; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 96edf2c34d43..941401a210f5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, "different types of data in same bucket: %s, %s", bch2_data_type_str(g->data_type), bch2_data_type_str(data_type))) { + BUG(); ret = -EIO; goto err; } @@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, bch2_data_type_str(ptr_data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + BUG(); ret = -EIO; goto err; } @@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans, static int bch2_trigger_pointer(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, struct extent_ptr_decoded p, - s64 *sectors, - unsigned flags) + const union bch_extent_entry *entry, + s64 *sectors, unsigned flags) { bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); struct bpos bucket; struct bch_backpointer bp; - bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); + bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp); *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); if (flags & BTREE_TRIGGER_TRANSACTIONAL) { @@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (flags & BTREE_TRIGGER_GC) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); percpu_down_read(&c->mark_lock); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); @@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors; - ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); + ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); if (ret < 0) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 6387e039f789..f9af5adabe83 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma fallthrough; case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_reclaim: + case BCH_WATERMARK_interior_updates: break; } @@ -394,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type) : "(invalid data type)"; } -static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type) -{ - if (type < BCH_DATA_NR) - prt_str(out, __bch2_data_types[type]); - else - prt_printf(out, "(invalid data type %u)", type); -} - /* disk reservations: */ static inline void bch2_disk_reservation_put(struct bch_fs *c, diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 38defa19d52d..72781aad6ba7 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -7,7 +7,7 @@ #include "chardev.h" #include "journal.h" #include "move.h" -#include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "super.h" #include "super-io.h" @@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg struct fsck_thread { struct thread_with_stdio thr; struct bch_fs *c; - char **devs; - size_t nr_devs; struct bch_opts opts; }; static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) { struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); - if (thr->devs) - for (size_t i = 0; i < thr->nr_devs; i++) - kfree(thr->devs[i]); - kfree(thr->devs); kfree(thr); } static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) { struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); - struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts); + struct bch_fs *c = thr->c; - if (IS_ERR(c)) - return PTR_ERR(c); + int ret = PTR_ERR_OR_ZERO(c); + if (ret) + return ret; - int ret = 0; - if (test_bit(BCH_FS_errors_fixed, &c->flags)) - ret |= 1; - if (test_bit(BCH_FS_error, &c->flags)) - ret |= 4; + ret = bch2_fs_start(thr->c); + if (ret) + goto err; - bch2_fs_stop(c); - - if (ret & 1) + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); - if (ret & 4) + ret |= 1; + } + if (test_bit(BCH_FS_error, &c->flags)) { bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); - + ret |= 4; + } +err: + bch2_fs_stop(c); return ret; } @@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a { struct bch_ioctl_fsck_offline arg; struct fsck_thread *thr = NULL; - u64 *devs = NULL; + darray_str(devs) = {}; long ret = 0; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -194,28 +190,31 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) || - !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) || - !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) { + for (size_t i = 0; i < arg.nr_devs; i++) { + u64 dev_u64; + ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); + if (ret) + goto err; + + char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); + ret = PTR_ERR_OR_ZERO(dev_str); + if (ret) + goto err; + + ret = darray_push(&devs, dev_str); + if (ret) { + kfree(dev_str); + goto err; + } + } + + thr = kzalloc(sizeof(*thr), GFP_KERNEL); + if (!thr) { ret = -ENOMEM; goto err; } thr->opts = bch2_opts_empty(); - thr->nr_devs = arg.nr_devs; - - if (copy_from_user(devs, &user_arg->devs[0], - array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) { - ret = -EINVAL; - goto err; - } - - for (size_t i = 0; i < arg.nr_devs; i++) { - thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX); - ret = PTR_ERR_OR_ZERO(thr->devs[i]); - if (ret) - goto err; - } if (arg.opts) { char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); @@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); + /* We need request_key() to be called before we punt to kthread: */ + opt_set(thr->opts, nostart, true); + + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); + + if (!IS_ERR(thr->c) && + thr->c->opts.errors == BCH_ON_ERROR_panic) + thr->c->opts.errors = BCH_ON_ERROR_ro; + ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); -err: - if (ret < 0) { - if (thr) - bch2_fsck_thread_exit(&thr->thr); - pr_err("ret %s", bch2_err_str(ret)); - } - kfree(devs); +out: + darray_for_each(devs, i) + kfree(*i); + darray_exit(&devs); return ret; +err: + if (thr) + bch2_fsck_thread_exit(&thr->thr); + pr_err("ret %s", bch2_err_str(ret)); + goto out; } static long bch2_global_ioctl(unsigned cmd, void __user *arg) diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 4701457f6381..7ed779b411f6 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, extent_nonce(version, crc_old), bio); if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { - bch_err(c, "checksum error in %s() (memory corruption or bug?)\n" - "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)", - __func__, - crc_old.csum.hi, - crc_old.csum.lo, - merged.hi, - merged.lo, - bch2_csum_types[crc_old.csum_type], - bch2_csum_types[new_csum_type]); + struct printbuf buf = PRINTBUF; + prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" + "expected %0llx:%0llx got %0llx:%0llx (old type ", + __func__, + crc_old.csum.hi, + crc_old.csum.lo, + merged.hi, + merged.lo); + bch2_prt_csum_type(&buf, crc_old.csum_type); + prt_str(&buf, " new type "); + bch2_prt_csum_type(&buf, new_csum_type); + prt_str(&buf, ")"); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); return -EIO; } diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 1b8c2c1016dc..e40499fde9a4 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out, struct bch_csum expected, struct bch_csum got) { - prt_printf(out, "checksum error: got "); + prt_str(out, "checksum error, type "); + bch2_prt_csum_type(out, type); + prt_str(out, ": got "); bch2_csum_to_text(out, type, got); prt_str(out, " should be "); bch2_csum_to_text(out, type, expected); - prt_printf(out, " type %s", bch2_csum_types[type]); } int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 58c2eb45570f..607fd5e232c9 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } -static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type) -{ - if (type < BCH_COMPRESSION_TYPE_NR) - prt_str(out, __bch2_compression_types[type]); - else - prt_printf(out, "(invalid compression type %u)", type); -} - int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, struct bch_extent_crc_unpacked *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 4150feca42a2..0022b51ce3c0 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -14,6 +14,7 @@ #include "move.h" #include "nocow_locking.h" #include "rebalance.h" +#include "snapshot.h" #include "subvolume.h" #include "trace.h" @@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans, unsigned ptrs_locked = 0; int ret = 0; + /* + * fs is corrupt we have a key for a snapshot node that doesn't exist, + * and we have to check for this because we go rw before repairing the + * snapshots table - just skip it, we can move it later. + */ + if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) + return -BCH_ERR_data_update_done; + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -571,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans, move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || - (!atomic_read(&ctxt->read_sectors) && - !atomic_read(&ctxt->write_sectors))); + list_empty(&ctxt->ios)); if (!locked) bch2_bucket_nocow_lock(&c->nocow_locks, @@ -590,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans, i++; } + unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have)); + /* * If current extent durability is less than io_opts.data_replicas, * we're not trying to rereplicate the extent up to data_replicas here - @@ -599,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans, * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - durability_have >= io_opts.data_replicas) { + !durability_required) { m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; m->data_opts.rewrite_ptrs = 0; /* if iter == NULL, it's just a promote */ @@ -608,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans, goto done; } - m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) + + m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; - BUG_ON(!m->op.nr_replicas); + /* + * If device(s) were set to durability=0 after data was written to them + * we can end up with a duribilty=0 extent, and the normal algorithm + * that tries not to increase durability doesn't work: + */ + if (!(durability_have + durability_removing)) + m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + + m->op.nr_replicas_required = m->op.nr_replicas; if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 208ce6f0fc43..cd99b7399414 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -13,6 +13,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "buckets.h" #include "debug.h" #include "error.h" @@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, i->size = size; i->ret = 0; - do { + while (1) { err = flush_buf(i); if (err) return err; @@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, if (!i->size) break; + if (done) + break; + done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); i->iter++; - } while (!done); + } if (i->buf.allocation_failure) return -ENOMEM; @@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = { .read = bch2_journal_pins_read, }; +static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (!i->iter) { + bch2_btree_updates_to_text(&i->buf, c); + i->iter++; + } + + err = flush_buf(i); + if (err) + return err; + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations btree_updates_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_btree_updates_read, +}; + static int btree_transaction_stats_open(struct inode *inode, struct file *file) { struct bch_fs *c = inode->i_private; struct dump_iter *i; i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); - if (!i) return -ENOMEM; @@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c) debugfs_remove_recursive(c->fs_debug_dir); } +static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd) +{ + struct dentry *d; + + d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir); + + debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops); + + debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops); + + debugfs_create_file("bfloat-failed", 0400, d, bd, + &bfloat_failed_debug_ops); +} + void bch2_fs_debug_init(struct bch_fs *c) { struct btree_debug *bd; @@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); + debugfs_create_file("btree_updates", 0400, c->fs_debug_dir, + c->btree_debug, &btree_updates_ops); + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, c, &btree_transaction_stats_op); @@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c) bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - debugfs_create_file(bch2_btree_id_str(bd->id), - 0400, c->btree_debug_dir, bd, - &btree_debug_ops); - - snprintf(name, sizeof(name), "%s-formats", - bch2_btree_id_str(bd->id)); - - debugfs_create_file(name, 0400, c->btree_debug_dir, bd, - &btree_format_debug_ops); - - snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_id_str(bd->id)); - - debugfs_create_file(name, 0400, c->btree_debug_dir, bd, - &bfloat_failed_debug_ops); + bch2_fs_debug_btree_init(c, bd); } } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 082075244e16..556a217108d3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -131,29 +131,33 @@ fsck_err: void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned i, nr_data = s->nr_blocks - s->nr_redundant; + const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v; + struct bch_stripe s = {}; - prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", - s->algorithm, - le16_to_cpu(s->sectors), - nr_data, - s->nr_redundant, - s->csum_type, - 1U << s->csum_granularity_bits); + memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k))); - for (i = 0; i < s->nr_blocks; i++) { - const struct bch_extent_ptr *ptr = s->ptrs + i; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - u32 offset; - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); + unsigned nr_data = s.nr_blocks - s.nr_redundant; - prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); - if (i < nr_data) - prt_printf(out, "#%u", stripe_blockcount_get(s, i)); - prt_printf(out, " gen %u", ptr->gen); - if (ptr_stale(ca, ptr)) - prt_printf(out, " stale"); + prt_printf(out, "algo %u sectors %u blocks %u:%u csum ", + s.algorithm, + le16_to_cpu(s.sectors), + nr_data, + s.nr_redundant); + bch2_prt_csum_type(out, s.csum_type); + prt_printf(out, " gran %u", 1U << s.csum_granularity_bits); + + for (unsigned i = 0; i < s.nr_blocks; i++) { + const struct bch_extent_ptr *ptr = sp->ptrs + i; + + if ((void *) ptr >= bkey_val_end(k)) + break; + + bch2_extent_ptr_to_text(out, c, ptr); + + if (s.csum_type < BCH_CSUM_NR && + i < nr_data && + stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k)) + prt_printf(out, "#%u", stripe_blockcount_get(sp, i)); } } @@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct printbuf err = PRINTBUF; struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); - prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", - want.hi, want.lo, - got.hi, got.lo, - bch2_csum_types[v->csum_type]); + prt_str(&err, "stripe "); + bch2_csum_err_msg(&err, v->csum_type, want, got); prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); bch_err_ratelimited(ca, "%s", err.buf); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index f4369b02e805..f042616888b0 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) static inline unsigned stripe_csum_offset(const struct bch_stripe *s, unsigned dev, unsigned csum_idx) { + EBUG_ON(s->csum_type >= BCH_CSUM_NR); + unsigned csum_bytes = bch_crc_bytes[s->csum_type]; return sizeof(struct bch_stripe) + diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index af25d8ec60f2..01a79fa3eacb 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -252,7 +252,8 @@ x(BCH_ERR_nopromote, nopromote_in_flight) \ x(BCH_ERR_nopromote, nopromote_no_writes) \ x(BCH_ERR_nopromote, nopromote_enomem) \ - x(0, need_inode_lock) + x(0, need_inode_lock) \ + x(0, invalid_snapshot_node) enum bch_errcode { BCH_ERR_START = 2048, diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 043431206799..82a6656c941c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "error.h" -#include "recovery.h" +#include "journal.h" +#include "recovery_passes.h" #include "super.h" #include "thread_with_file.h" @@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c) return false; case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) - bch_err(c, "inconsistency detected - emergency read only"); + bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", + journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: panic(bch2_fmt(c, "panic after error")); diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index ae1d6674c512..36caedf72d89 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *); int bch2_topology_error(struct bch_fs *); +#define bch2_fs_topology_error(c, ...) \ +({ \ + bch_err(c, "btree topology error: " __VA_ARGS__); \ + bch2_topology_error(c); \ +}) + #define bch2_fs_inconsistent(c, ...) \ ({ \ bch_err(c, __VA_ARGS__); \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 61395b113df9..1a331e539204 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err, - btree_ptr_v2_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, + c, err, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); + bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), + c, err, btree_ptr_v2_min_key_bad, + "min_key > key"); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); fsck_err: return ret; @@ -973,6 +978,33 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) return bkey_deleted(k.k); } +void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) +{ + struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + ? bch_dev_bkey_exists(c, ptr->dev) + : NULL; + + if (!ca) { + prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : ""); + } else { + u32 offset; + u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); + + prt_printf(out, "ptr: %u:%llu:%u gen %u", + ptr->dev, b, offset, ptr->gen); + if (ptr->cached) + prt_str(out, " cached"); + if (ptr->unwritten) + prt_str(out, " unwritten"); + if (b >= ca->mi.first_bucket && + b < ca->mi.nbuckets && + ptr_stale(ca, ptr)) + prt_printf(out, " stale"); + } +} + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -988,42 +1020,22 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " "); switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: { - const struct bch_extent_ptr *ptr = entry_to_ptr(entry); - struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] - ? bch_dev_bkey_exists(c, ptr->dev) - : NULL; - - if (!ca) { - prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : ""); - } else { - u32 offset; - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - - prt_printf(out, "ptr: %u:%llu:%u gen %u", - ptr->dev, b, offset, ptr->gen); - if (ptr->cached) - prt_str(out, " cached"); - if (ptr->unwritten) - prt_str(out, " unwritten"); - if (ca && ptr_stale(ca, ptr)) - prt_printf(out, " stale"); - } + case BCH_EXTENT_ENTRY_ptr: + bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry)); break; - } + case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc128: { struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ", + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", crc.compressed_size, crc.uncompressed_size, - crc.offset, crc.nonce, - bch2_csum_types[crc.csum_type]); + crc.offset, crc.nonce); + bch2_prt_csum_type(out, crc.csum_type); + prt_str(out, " compress "); bch2_prt_compression_type(out, crc.compression_type); break; } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index fd2669cdd76f..528e817eacbd 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) return ret; } -static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr) -{ - switch (k.k->type) { - case KEY_TYPE_btree_ptr: - case KEY_TYPE_btree_ptr_v2: - return BCH_DATA_btree; - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - return BCH_DATA_user; - case KEY_TYPE_stripe: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - BUG_ON(ptr < s.v->ptrs || - ptr >= s.v->ptrs + s.v->nr_blocks); - - return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant - ? BCH_DATA_parity - : BCH_DATA_user; - } - default: - BUG(); - } -} - unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); @@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c new file mode 100644 index 000000000000..0f955c3c76a7 --- /dev/null +++ b/fs/bcachefs/eytzinger.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "eytzinger.h" + +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required alignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) +{ + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; +} + +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is still valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static void swap_words_32(void *a, void *b, size_t n) +{ + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); +} + +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static void swap_words_64(void *a, void *b, size_t n) +{ + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); +} + +/** + * swap_bytes - swap two elements a byte at a time + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static void swap_bytes(void *a, void *b, size_t n) +{ + do { + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); +} + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 (swap_r_func_t)0 +#define SWAP_WORDS_32 (swap_r_func_t)1 +#define SWAP_BYTES (swap_r_func_t)2 +#define SWAP_WRAPPER (swap_r_func_t)3 + +struct wrapper { + cmp_func_t cmp; + swap_func_t swap_func; +}; + +/* + * The function pointer is last to make tail calls most efficient if the + * compiler decides not to inline this function. + */ +static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) +{ + if (swap_func == SWAP_WRAPPER) { + ((const struct wrapper *)priv)->swap_func(a, b, (int)size); + return; + } + + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, (int)size, priv); +} + +#define _CMP_WRAPPER ((cmp_r_func_t)0L) + +static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv) +{ + if (cmp == _CMP_WRAPPER) + return ((const struct wrapper *)priv)->cmp(a, b); + return cmp(a, b, priv); +} + +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, const void *priv, + size_t l, size_t r) +{ + return do_cmp(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + cmp_func, priv); +} + +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size, + swap_r_func_t swap_func, const void *priv, + size_t l, size_t r) +{ + do_swap(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + size, swap_func, priv); +} + +void eytzinger0_sort_r(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv) +{ + int i, c, r; + + /* called from 'sort' without swap function, let's pick the default */ + if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func) + swap_func = NULL; + + if (!swap_func) { + if (is_aligned(base, size, 8)) + swap_func = SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + swap_func = SWAP_WORDS_32; + else + swap_func = SWAP_BYTES; + } + + /* heapify */ + for (i = n / 2 - 1; i >= 0; --i) { + for (r = i; r * 2 + 1 < n; r = c) { + c = r * 2 + 1; + + if (c + 1 < n && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } + + /* sort */ + for (i = n - 1; i > 0; --i) { + eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); + + for (r = 0; r * 2 + 1 < i; r = c) { + c = r * 2 + 1; + + if (c + 1 < i && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } +} + +void eytzinger0_sort(void *base, size_t n, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func) +{ + struct wrapper w = { + .cmp = cmp_func, + .swap_func = swap_func, + }; + + return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); +} diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index b04750dbf870..24840aee335c 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -5,23 +5,33 @@ #include #include -#include "util.h" +#ifdef EYTZINGER_DEBUG +#define EYTZINGER_BUG_ON(cond) BUG_ON(cond) +#else +#define EYTZINGER_BUG_ON(cond) +#endif /* * Traversal for trees in eytzinger layout - a full binary tree layed out in an - * array - */ - -/* - * One based indexing version: + * array. * - * With one based indexing each level of the tree starts at a power of two - - * good for cacheline alignment: + * Consider using an eytzinger tree any time you would otherwise be doing binary + * search over an array. Binary search is a worst case scenario for branch + * prediction and prefetching, but in an eytzinger tree every node's children + * are adjacent in memory, thus we can prefetch children before knowing the + * result of the comparison, assuming multiple nodes fit on a cacheline. + * + * Two variants are provided, for one based indexing and zero based indexing. + * + * Zero based indexing is more convenient, but one based indexing has better + * alignment and thus better performance because each new level of the tree + * starts at a power of two, and thus if element 0 was cacheline aligned, each + * new level will be as well. */ static inline unsigned eytzinger1_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + child; } @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size) static inline unsigned eytzinger1_next(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_right_child(i) <= size) { i = eytzinger1_right_child(i); @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) static inline unsigned eytzinger1_prev(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_left_child(i) <= size) { i = eytzinger1_left_child(i) + 1; @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, unsigned shift = __fls(size) - b; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); i ^= 1U << b; i <<= 1; @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, unsigned shift; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); /* * sign bit trick: @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) static inline unsigned eytzinger0_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + 1 + child; } @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) (_i) != -1; \ (_i) = eytzinger0_next((_i), (_size))) -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); - /* return greatest node <= @search, or -1 if not found */ -static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, - eytzinger_cmp_fn cmp, const void *search) +static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, + cmp_func_t cmp, const void *search) { unsigned i, n = 0; @@ -244,21 +252,38 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, do { i = n; - n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); + n = eytzinger0_child(i, cmp(base + i * size, search) <= 0); } while (n < nr); if (n & 1) { - /* @i was greater than @search, return previous node: */ - - if (i == eytzinger0_first(nr)) - return -1; - + /* + * @i was greater than @search, return previous node: + * + * if @i was leftmost/smallest element, + * eytzinger0_prev(eytzinger0_first())) returns -1, as expected + */ return eytzinger0_prev(i, nr); } else { return i; } } +static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, + cmp_func_t cmp, const void *search) +{ + ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); + + /* + * if eytitzinger0_find_le() returned -1 - no element was <= search - we + * want to return the first element; next/prev identities mean this work + * as expected + * + * similarly if find_le() returns last element, we should return -1; + * identities mean this all works out: + */ + return eytzinger0_next(idx, nr); +} + #define eytzinger0_find(base, nr, size, _cmp, search) \ ({ \ void *_base = (base); \ @@ -269,13 +294,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, int _res; \ \ while (_i < _nr && \ - (_res = _cmp(_search, _base + _i * _size, _size))) \ + (_res = _cmp(_search, _base + _i * _size))) \ _i = eytzinger0_child(_i, _res > 0); \ _i; \ }) -void eytzinger0_sort(void *, size_t, size_t, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); +void eytzinger0_sort_r(void *, size_t, size_t, + cmp_r_func_t, swap_r_func_t, const void *); +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); #endif /* _EYTZINGER_H */ diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 33cb6da3a5ad..b889370a5088 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) ret = dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); + bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write); + /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); @@ -536,7 +538,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio) if (likely(!dio->iter.count) || dio->op.error) break; - bio_reset(bio, NULL, REQ_OP_WRITE); + bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); } out: return bch2_dio_write_done(dio); @@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) prefetch(&inode->ei_inode); prefetch((void *) &inode->ei_inode + 64); + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write)) + return -EROFS; + inode_lock(&inode->v); ret = generic_write_checks(req, iter); if (unlikely(ret <= 0)) - goto err; + goto err_put_write_ref; ret = file_remove_privs(file); if (unlikely(ret)) - goto err; + goto err_put_write_ref; ret = file_update_time(file); if (unlikely(ret)) - goto err; + goto err_put_write_ref; if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) - goto err; + goto err_put_write_ref; inode_dio_begin(&inode->v); bch2_pagecache_block_get(inode); @@ -618,7 +623,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) bio = bio_alloc_bioset(NULL, bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_WRITE, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE, GFP_KERNEL, &c->dio_write_bioset); dio = container_of(bio, struct dio_write, op.wbio.bio); @@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) } ret = bch2_dio_write_loop(dio); -err: +out: if (locked) inode_unlock(&inode->v); return ret; @@ -653,7 +658,9 @@ err_put_bio: bch2_pagecache_block_put(inode); bio_put(bio); inode_dio_end(&inode->v); - goto err; +err_put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_dio_write); + goto out; } void bch2_fs_fs_io_direct_exit(struct bch_fs *c) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 8c70123b6a0c..20b40477425f 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_flush_inode(struct bch_fs *c, struct bch_inode_info *inode) { - struct bch_inode_unpacked u; - int ret; - if (c->opts.journal_flush_disabled) return 0; - ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); - if (ret) - return ret; + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) + return -EROFS; - return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: - bch2_inode_flush_nocow_writes(c, inode); + struct bch_inode_unpacked u; + int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: + bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: + bch2_inode_flush_nocow_writes(c, inode); + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); + return ret; } int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0ccee05f6887..b5ea9fa1259d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1997,6 +1997,7 @@ out: return dget(sb->s_root); err_put_super: + __bch2_fs_stop(c); deactivate_locked_super(sb); return ERR_PTR(bch2_err_class(ret)); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 47d4eefaba7b..8e2010212cc3 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -12,7 +12,7 @@ #include "fsck.h" #include "inode.h" #include "keylist.h" -#include "recovery.h" +#include "recovery_passes.h" #include "snapshot.h" #include "super.h" #include "xattr.h" @@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, u32 *snapshot, u64 *inum) { struct bch_subvolume s; - int ret; - - ret = bch2_subvolume_get(trans, subvol, false, 0, &s); + int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); *snapshot = le32_to_cpu(s.snapshot); *inum = le64_to_cpu(s.inode); @@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); - ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash_info, &iter, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + &dir_hash_info, &iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); bch2_trans_iter_exit(trans, &iter); err: bch_err_fn(c, ret); @@ -169,7 +168,8 @@ err: /* Get lost+found, create if it doesn't exist: */ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - struct bch_inode_unpacked *lostfound) + struct bch_inode_unpacked *lostfound, + u64 reattaching_inum) { struct bch_fs *c = trans->c; struct qstr lostfound_str = QSTR("lost+found"); @@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; - u32 subvol_snapshot; - ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), - &subvol_snapshot, &root_inum.inum); - bch_err_msg(c, ret, "looking up root subvol"); + struct bch_subvolume subvol; + ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), + false, 0, &subvol); + bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", + le32_to_cpu(st.master_subvol), snapshot); if (ret) return ret; + if (!subvol.inode) { + struct btree_iter iter; + struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); + if (ret) + return ret; + + subvol->v.inode = cpu_to_le64(reattaching_inum); + bch2_trans_iter_exit(trans, &iter); + } + + root_inum.inum = le64_to_cpu(subvol.inode); + struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; u32 root_inode_snapshot = snapshot; ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); - bch_err_msg(c, ret, "looking up root inode"); + bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", + root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) return ret; @@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans, snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); } - ret = lookup_lostfound(trans, dirent_snapshot, &lostfound); + ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); if (ret) return ret; @@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume return ret; } +static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) +{ + struct bch_fs *c = trans->c; + + if (!bch2_snapshot_is_leaf(c, snapshotid)) { + bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); + return -BCH_ERR_fsck_repair_unimplemented; + } + + /* + * If inum isn't set, that means we're being called from check_dirents, + * not check_inodes - the root of this subvolume doesn't exist or we + * would have found it there: + */ + if (!inum) { + struct btree_iter inode_iter = {}; + struct bch_inode_unpacked new_inode; + u64 cpu = raw_smp_processor_id(); + + bch2_inode_init_early(c, &new_inode); + bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); + + new_inode.bi_subvol = subvolid; + + int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: + bch2_btree_iter_traverse(&inode_iter) ?: + bch2_inode_write(trans, &inode_iter, &new_inode); + bch2_trans_iter_exit(trans, &inode_iter); + if (ret) + return ret; + + inum = new_inode.bi_inum; + } + + bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); + + struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); + int ret = PTR_ERR_OR_ZERO(new_subvol); + if (ret) + return ret; + + bkey_subvolume_init(&new_subvol->k_i); + new_subvol->k.p.offset = subvolid; + new_subvol->v.snapshot = cpu_to_le32(snapshotid); + new_subvol->v.inode = cpu_to_le64(inum); + ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); + if (ret) + return ret; + + struct btree_iter iter; + struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshots, POS(0, snapshotid), + 0, snapshot); + ret = PTR_ERR_OR_ZERO(s); + bch_err_msg(c, ret, "getting snapshot %u", snapshotid); + if (ret) + return ret; + + u32 snapshot_tree = le32_to_cpu(s->v.tree); + + s->v.subvol = cpu_to_le32(subvolid); + SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); + bch2_trans_iter_exit(trans, &iter); + + struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshot_trees, POS(0, snapshot_tree), + 0, snapshot_tree); + ret = PTR_ERR_OR_ZERO(st); + bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); + if (ret) + return ret; + + if (!st->v.master_subvol) + st->v.master_subvol = cpu_to_le32(subvolid); + + bch2_trans_iter_exit(trans, &iter); + return 0; +} + +static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode) +{ + struct bch_fs *c = trans->c; + struct bch_inode_unpacked new_inode; + + bch2_inode_init_early(c, &new_inode); + bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL); + new_inode.bi_size = size; + new_inode.bi_inum = inum; + + return __bch2_fsck_write_inode(trans, &new_inode, snapshot); +} + +static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum) +{ + struct btree_iter iter = {}; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); + struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); + bch2_trans_iter_exit(trans, &iter); + int ret = bkey_err(k); + if (ret) + return ret; + + return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG); +} + struct snapshots_seen_entry { u32 id; u32 equiv; @@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; + if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { + ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); + goto do_update; + } + if (fsck_err_on(ret, c, inode_bi_subvol_missing, "inode %llu:%u bi_subvol points to missing subvolume %u", @@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans, do_update = true; } } - +do_update: if (do_update) { ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); @@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal i->count = count2; if (i->count != count2) { - bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->snapshot, i->count, count2); + bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", + w->last_pos.inode, i->snapshot, i->count, count2); return -BCH_ERR_internal_fsck_err; } @@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans, goto err; } - ret = extent_ends_at(c, extent_ends, seen, k); - if (ret) - goto err; - extent_ends->last_pos = k.k->p; err: return ret; @@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; if (k.k->type != KEY_TYPE_whiteout) { + if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { + ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + inode->last_pos.inode--; + ret = -BCH_ERR_transaction_restart_nested; + goto err; + } + if (fsck_err_on(!i, c, extent_in_missing_inode, "extent in missing inode:\n %s", (printbuf_reset(&buf), @@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, i->seen_this_pos = true; } + + if (k.k->type != KEY_TYPE_whiteout) { + ret = extent_ends_at(c, extent_ends, s, k); + if (ret) + goto err; + } out: err: fsck_err: @@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ return count2; if (i->count != count2) { - bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", - i->count, count2); + bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", + w->last_pos.inode, i->snapshot, i->count, count2); i->count = count2; if (i->inode.bi_nlink == i->count) continue; @@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); u32 parent_snapshot; + u32 new_parent_subvol = 0; u64 parent_inum; struct printbuf buf = PRINTBUF; int ret = 0; @@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret && !bch2_err_matches(ret, ENOENT)) return ret; + if (ret || + (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { + int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); + if (ret2 && !bch2_err_matches(ret, ENOENT)) + return ret2; + } + + if (ret && + !new_parent_subvol && + (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { + /* + * Couldn't find a subvol for dirent's snapshot - but we lost + * subvols, so we need to reconstruct: + */ + ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); + if (ret) + return ret; + + parent_snapshot = d.k->p.snapshot; + } + if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, "dirent parent_subvol points to missing subvolume\n%s", (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || @@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", parent_snapshot, (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { - u32 new_parent_subvol; - ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); - if (ret) - goto err; + if (!new_parent_subvol) { + bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); + return -BCH_ERR_fsck_repair_unimplemented; + } struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); ret = PTR_ERR_OR_ZERO(new_dirent); @@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; + goto err; - if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol, + if (ret) { + bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); + ret = -BCH_ERR_fsck_repair_unimplemented; + ret = 0; + goto err; + } + + if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, c, inode_bi_parent_wrong, "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", target_inum, @@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * subvol_root.bi_parent_subvol = parent_subvol; ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } ret = check_dirent_target(trans, iter, d, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; out: err: fsck_err: @@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct snapshots_seen *s) { struct bch_fs *c = trans->c; - struct bkey_s_c_dirent d; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; struct bpos equiv; @@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); dir->first_this_inode = false; + if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { + ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + dir->last_pos.inode--; + ret = -BCH_ERR_transaction_restart_nested; + goto err; + } + if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, "dirent in nonexisting directory:\n%s", (printbuf_reset(&buf), @@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_dirent) goto out; - d = bkey_s_c_to_dirent(k); + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); @@ -2098,17 +2278,21 @@ static int check_root_trans(struct btree_trans *trans) if (mustfix_fsck_err_on(ret, c, root_subvol_missing, "root subvol missing")) { - struct bkey_i_subvolume root_subvol; + struct bkey_i_subvolume *root_subvol = + bch2_trans_kmalloc(trans, sizeof(*root_subvol)); + ret = PTR_ERR_OR_ZERO(root_subvol); + if (ret) + goto err; snapshot = U32_MAX; inum = BCACHEFS_ROOT_INO; - bkey_subvolume_init(&root_subvol.k_i); - root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; - root_subvol.v.flags = 0; - root_subvol.v.snapshot = cpu_to_le32(snapshot); - root_subvol.v.inode = cpu_to_le64(inum); - ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0); + bkey_subvolume_init(&root_subvol->k_i); + root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; + root_subvol->v.flags = 0; + root_subvol->v.snapshot = cpu_to_le32(snapshot); + root_subvol->v.inode = cpu_to_le64(inum); + ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); bch_err_msg(c, ret, "writing root subvol"); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 2b5e06770ab3..ca4a066e9a54 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, prt_printf(out, "bi_sectors=%llu", inode->bi_sectors); prt_newline(out); - prt_newline(out); prt_printf(out, "bi_version=%llu", inode->bi_version); + prt_newline(out); #define x(_name, _bits) \ prt_printf(out, #_name "=%llu", (u64) inode->_name); \ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 1baf78594cca..82f9170dab3f 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, ret = 0; err: bch2_logged_op_finish(trans, op_k); + bch_err_fn(c, ret); return ret; } @@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish: break; } err: + bch_err_fn(c, ret); bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 725fcf46f631..9aa28b52ab92 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out, if (entry) { prt_str(out, " type="); - prt_str(out, bch2_jset_entry_types[entry->type]); + bch2_prt_jset_entry_type(out, entry->type); } if (!jset) { @@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs jset_entry_for_each_key(entry, k) { if (!first) { prt_newline(out); - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); + bch2_prt_jset_entry_type(out, entry->type); + prt_str(out, ": "); } prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); @@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); - prt_printf(out, "type=%s v=%llu", - bch2_fs_usage_types[u->entry.btree_id], - le64_to_cpu(u->v)); + prt_str(out, "type="); + bch2_prt_fs_usage_type(out, u->entry.btree_id); + prt_printf(out, " v=%llu", le64_to_cpu(u->v)); } static int journal_entry_data_usage_validate(struct bch_fs *c, @@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c, void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry *entry) { + bch2_prt_jset_entry_type(out, entry->type); + if (entry->type < BCH_JSET_ENTRY_NR) { - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); + prt_str(out, ": "); bch2_jset_entry_ops[entry->type].to_text(out, c, entry); - } else { - prt_printf(out, "(unknown type %u)", entry->type); } } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ab811c0dad26..04a577848b01 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j) track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb)) trace_and_count(c, journal_full, c); + mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin); + swap(watermark, j->watermark); if (watermark > j->watermark) journal_wake(j); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index b5303874fc35..37a024e034d4 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -95,8 +95,7 @@ out: return ret ?: bch2_blacklist_table_initialize(c); } -static int journal_seq_blacklist_table_cmp(const void *_l, - const void *_r, size_t size) +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) { const struct journal_seq_blacklist_table_entry *l = _l; const struct journal_seq_blacklist_table_entry *r = _r; diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 8c053cb64ca5..b5161b5d76a0 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -134,6 +134,7 @@ enum journal_flags { JOURNAL_STARTED, JOURNAL_MAY_SKIP_FLUSH, JOURNAL_NEED_FLUSH_WRITE, + JOURNAL_SPACE_LOW, }; /* Reasons we may fail to get a journal reservation: */ diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 9fac838d123e..b82f8209041f 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); struct bkey_buf sk; u32 restart_count = trans->restart_count; - int ret; if (!fn) return 0; @@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_init(&sk); bch2_bkey_buf_reassemble(&sk, c, k); - ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?: - fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count); + fn->resume(trans, sk.k); bch2_bkey_buf_exit(&sk, c); - return ret; + + return trans_was_restarted(trans, restart_count); } int bch2_resume_logged_ops(struct bch_fs *c) diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c index db63b3f3b338..4c298e74723d 100644 --- a/fs/bcachefs/mean_and_variance_test.c +++ b/fs/bcachefs/mean_and_variance_test.c @@ -136,20 +136,8 @@ static void mean_and_variance_test_1(struct kunit *test) d, mean, stddev, weighted_mean, weighted_stddev); } -static void mean_and_variance_test_2(struct kunit *test) -{ - s64 d[] = { 100, 10, 10, 10, 10, 10, 10 }; - s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 }; - s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 }; - s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 }; - s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 }; - - do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, - d, mean, stddev, weighted_mean, weighted_stddev); -} - /* Test behaviour where we switch from one steady state to another: */ -static void mean_and_variance_test_3(struct kunit *test) +static void mean_and_variance_test_2(struct kunit *test) { s64 d[] = { 100, 100, 100, 100, 100 }; s64 mean[] = { 22, 32, 40, 46, 50 }; @@ -161,18 +149,6 @@ static void mean_and_variance_test_3(struct kunit *test) d, mean, stddev, weighted_mean, weighted_stddev); } -static void mean_and_variance_test_4(struct kunit *test) -{ - s64 d[] = { 100, 100, 100, 100, 100 }; - s64 mean[] = { 10, 11, 12, 13, 14 }; - s64 stddev[] = { 9, 13, 15, 17, 19 }; - s64 weighted_mean[] = { 32, 49, 61, 71, 78 }; - s64 weighted_stddev[] = { 38, 44, 44, 41, 38 }; - - do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, - d, mean, stddev, weighted_mean, weighted_stddev); -} - static void mean_and_variance_fast_divpow2(struct kunit *test) { s64 i; @@ -230,8 +206,6 @@ static struct kunit_case mean_and_variance_test_cases[] = { KUNIT_CASE(mean_and_variance_weighted_advanced_test), KUNIT_CASE(mean_and_variance_test_1), KUNIT_CASE(mean_and_variance_test_2), - KUNIT_CASE(mean_and_variance_test_3), - KUNIT_CASE(mean_and_variance_test_4), {} }; diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 08ea0cfc4aef..bb068fd72465 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -7,6 +7,7 @@ #include "disk_groups.h" #include "error.h" #include "opts.h" +#include "recovery_passes.h" #include "super-io.h" #include "util.h" @@ -42,7 +43,7 @@ const char * const __bch2_btree_ids[] = { NULL }; -const char * const bch2_csum_types[] = { +static const char * const __bch2_csum_types[] = { BCH_CSUM_TYPES() NULL }; @@ -52,7 +53,7 @@ const char * const bch2_csum_opts[] = { NULL }; -const char * const __bch2_compression_types[] = { +static const char * const __bch2_compression_types[] = { BCH_COMPRESSION_TYPES() NULL }; @@ -82,18 +83,39 @@ const char * const bch2_member_states[] = { NULL }; -const char * const bch2_jset_entry_types[] = { +static const char * const __bch2_jset_entry_types[] = { BCH_JSET_ENTRY_TYPES() NULL }; -const char * const bch2_fs_usage_types[] = { +static const char * const __bch2_fs_usage_types[] = { BCH_FS_USAGE_TYPES() NULL }; #undef x +static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[], + unsigned nr, const char *type, unsigned idx) +{ + if (idx < nr) + prt_str(out, opts[idx]); + else + prt_printf(out, "(unknown %s %u)", type, idx); +} + +#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \ +void bch2_prt_##name(struct printbuf *out, type t) \ +{ \ + prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\ +} + +PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); +PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); +PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); +PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); +PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); + static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, struct printbuf *err) { @@ -205,6 +227,9 @@ const struct bch_option bch2_opt_table[] = { #define OPT_STR(_choices) .type = BCH_OPT_STR, \ .min = 0, .max = ARRAY_SIZE(_choices), \ .choices = _choices +#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \ + .min = 0, .max = U64_MAX, \ + .choices = _choices #define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 136083c11f3a..84e452835a17 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; extern const char * const __bch2_btree_ids[]; -extern const char * const bch2_csum_types[]; extern const char * const bch2_csum_opts[]; -extern const char * const __bch2_compression_types[]; extern const char * const bch2_compression_opts[]; extern const char * const bch2_str_hash_types[]; extern const char * const bch2_str_hash_opts[]; extern const char * const __bch2_data_types[]; extern const char * const bch2_member_states[]; -extern const char * const bch2_jset_entry_types[]; -extern const char * const bch2_fs_usage_types[]; extern const char * const bch2_d_types[]; +void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); +void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); +void bch2_prt_data_type(struct printbuf *, enum bch_data_type); +void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); +void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); + static inline const char *bch2_d_type_str(unsigned d_type) { return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)"; @@ -362,12 +364,17 @@ enum fsck_err_opts { OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ - NULL, "Don't replay the journal") \ - x(keep_journal, u8, \ + NULL, "Exit recovery immediately prior to journal replay")\ + x(recovery_pass_last, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_STR_NOLIMIT(bch2_recovery_passes), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Exit recovery after specified pass") \ + x(retain_recovery_info, u8, \ 0, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ - NULL, "Don't free journal entries/keys after startup")\ + NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\ x(read_entire_journal, u8, \ 0, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 03f9d6afe467..0f328aba9760 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1,35 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "backpointers.h" -#include "bkey_buf.h" #include "alloc_background.h" -#include "btree_gc.h" +#include "bkey_buf.h" #include "btree_journal_iter.h" +#include "btree_node_scan.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_io.h" #include "buckets.h" #include "dirent.h" -#include "ec.h" #include "errcode.h" #include "error.h" #include "fs-common.h" -#include "fsck.h" #include "journal_io.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" -#include "lru.h" #include "logged_ops.h" #include "move.h" #include "quota.h" #include "rebalance.h" #include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "sb-clean.h" #include "sb-downgrade.h" #include "snapshot.h" -#include "subvolume.h" #include "super-io.h" #include @@ -37,22 +33,22 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -static bool btree_id_is_alloc(enum btree_id id) +void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { - switch (id) { - case BTREE_ID_alloc: - case BTREE_ID_backpointers: - case BTREE_ID_need_discard: - case BTREE_ID_freespace: - case BTREE_ID_bucket_gens: - return true; - default: - return false; + u64 b = BIT_ULL(btree); + + if (!(c->sb.btrees_lost_data & b)) { + bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); + + mutex_lock(&c->sb_lock); + bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); } } /* for -o reconstruct_alloc: */ -static void do_reconstruct_alloc(struct bch_fs *c) +static void bch2_reconstruct_alloc(struct bch_fs *c) { bch2_journal_log_msg(c, "dropping alloc info"); bch_info(c, "dropping and reconstructing all alloc info"); @@ -87,15 +83,17 @@ static void do_reconstruct_alloc(struct bch_fs *c) c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - struct journal_keys *keys = &c->journal_keys; - size_t src, dst; - move_gap(keys, keys->nr); - - for (src = 0, dst = 0; src < keys->nr; src++) - if (!btree_id_is_alloc(keys->data[src].btree_id)) - keys->data[dst++] = keys->data[src]; - keys->nr = keys->gap = dst; + bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); } /* @@ -186,7 +184,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_journal_replay(struct bch_fs *c) +int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; DARRAY(struct journal_key *) keys_sorted = { 0 }; @@ -194,6 +192,7 @@ static int bch2_journal_replay(struct bch_fs *c) u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; struct btree_trans *trans = bch2_trans_get(c); + bool immediate_flush = false; int ret = 0; if (keys->nr) { @@ -215,6 +214,13 @@ static int bch2_journal_replay(struct bch_fs *c) darray_for_each(*keys, k) { cond_resched(); + /* + * k->allocated means the key wasn't read in from the journal, + * rather it was from early repair code + */ + if (k->allocated) + immediate_flush = true; + /* Skip fastpath if we're low on space in the journal */ ret = c->journal.watermark ? -1 : commit_do(trans, NULL, NULL, @@ -266,7 +272,8 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_trans_put(trans); trans = NULL; - if (!c->opts.keep_journal) + if (!c->opts.retain_recovery_info && + c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) bch2_journal_keys_put_initial(c); replay_now_at(j, j->replay_journal_seq_end); @@ -274,6 +281,12 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_set_replay_done(j); + /* if we did any repair, flush it immediately */ + if (immediate_flush) { + bch2_journal_flush_all_pins(&c->journal); + ret = bch2_journal_meta(&c->journal); + } + if (keys->nr) bch2_journal_log_msg(c, "journal replay finished"); err: @@ -423,10 +436,9 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { - unsigned i; int ret = 0; - for (i = 0; i < btree_id_nr_alive(c); i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (!r->alive) @@ -435,186 +447,46 @@ static int read_btree_roots(struct bch_fs *c) if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) continue; - if (r->error) { - __fsck_err(c, - btree_id_is_alloc(i) - ? FSCK_CAN_IGNORE : 0, - btree_root_bkey_invalid, - "invalid btree root %s", - bch2_btree_id_str(i)); - if (i == BTREE_ID_alloc) + if (mustfix_fsck_err_on((ret = r->error), + c, btree_root_bkey_invalid, + "invalid btree root %s", + bch2_btree_id_str(i)) || + mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), + c, btree_root_read_error, + "error reading btree root %s l=%u: %s", + bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { + if (btree_id_is_alloc(i)) { + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); - } + r->error = 0; + } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { + bch_info(c, "will run btree node scan"); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + } - ret = bch2_btree_root_read(c, i, &r->key, r->level); - if (ret) { - fsck_err(c, - btree_root_read_error, - "error reading btree root %s", - bch2_btree_id_str(i)); - if (btree_id_is_alloc(i)) - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); ret = 0; + bch2_btree_lost_data(c, i); } } - for (i = 0; i < BTREE_ID_NR; i++) { + for (unsigned i = 0; i < BTREE_ID_NR; i++) { struct btree_root *r = bch2_btree_id_root(c, i); - if (!r->b) { + if (!r->b && !r->error) { r->alive = false; r->level = 0; - bch2_btree_root_alloc(c, i); + bch2_btree_root_alloc_fake(c, i, 0); } } fsck_err: return ret; } -static int bch2_initialize_subvolumes(struct bch_fs *c) -{ - struct bkey_i_snapshot_tree root_tree; - struct bkey_i_snapshot root_snapshot; - struct bkey_i_subvolume root_volume; - int ret; - - bkey_snapshot_tree_init(&root_tree.k_i); - root_tree.k.p.offset = 1; - root_tree.v.master_subvol = cpu_to_le32(1); - root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); - - bkey_snapshot_init(&root_snapshot.k_i); - root_snapshot.k.p.offset = U32_MAX; - root_snapshot.v.flags = 0; - root_snapshot.v.parent = 0; - root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); - root_snapshot.v.tree = cpu_to_le32(1); - SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); - - bkey_subvolume_init(&root_volume.k_i); - root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; - root_volume.v.flags = 0; - root_volume.v.snapshot = cpu_to_le32(U32_MAX); - root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - - ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); - bch_err_fn(c, ret); - return ret; -} - -static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bch_inode_unpacked inode; - int ret; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); - ret = bkey_err(k); - if (ret) - return ret; - - if (!bkey_is_inode(k.k)) { - bch_err(trans->c, "root inode not found"); - ret = -BCH_ERR_ENOENT_inode; - goto err; - } - - ret = bch2_inode_unpack(k, &inode); - BUG_ON(ret); - - inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; - - ret = bch2_inode_write(trans, &iter, &inode); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -/* set bi_subvol on root inode */ -noinline_for_stack -static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) -{ - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; -} - -const char * const bch2_recovery_passes[] = { -#define x(_fn, ...) #_fn, - BCH_RECOVERY_PASSES() -#undef x - NULL -}; - -static int bch2_check_allocations(struct bch_fs *c) -{ - return bch2_gc(c, true, c->opts.norecovery); -} - -static int bch2_set_may_go_rw(struct bch_fs *c) -{ - struct journal_keys *keys = &c->journal_keys; - - /* - * After we go RW, the journal keys buffer can't be modified (except for - * setting journal_key->overwritten: it will be accessed by multiple - * threads - */ - move_gap(keys, keys->nr); - - set_bit(BCH_FS_may_go_rw, &c->flags); - - if (keys->nr || c->opts.fsck || !c->sb.clean) - return bch2_fs_read_write_early(c); - return 0; -} - -struct recovery_pass_fn { - int (*fn)(struct bch_fs *); - unsigned when; -}; - -static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, - BCH_RECOVERY_PASSES() -#undef x -}; - -u64 bch2_recovery_passes_to_stable(u64 v) -{ - static const u8 map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, - BCH_RECOVERY_PASSES() -#undef x - }; - - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(map[i]); - return ret; -} - -u64 bch2_recovery_passes_from_stable(u64 v) -{ - static const u8 map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, - BCH_RECOVERY_PASSES() -#undef x - }; - - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(map[i]); - return ret; -} - static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_version = bcachefs_metadata_version_current; @@ -687,96 +559,6 @@ static bool check_version_upgrade(struct bch_fs *c) return false; } -u64 bch2_fsck_recovery_passes(void) -{ - u64 ret = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) - if (recovery_pass_fns[i].when & PASS_FSCK) - ret |= BIT_ULL(i); - return ret; -} - -static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - struct recovery_pass_fn *p = recovery_pass_fns + pass; - - if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) - return false; - if (c->recovery_passes_explicit & BIT_ULL(pass)) - return true; - if ((p->when & PASS_FSCK) && c->opts.fsck) - return true; - if ((p->when & PASS_UNCLEAN) && !c->sb.clean) - return true; - if (p->when & PASS_ALWAYS) - return true; - return false; -} - -static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - struct recovery_pass_fn *p = recovery_pass_fns + pass; - int ret; - - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); - ret = p->fn(c); - if (ret) - return ret; - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_CONT " done\n"); - - return 0; -} - -static int bch2_run_recovery_passes(struct bch_fs *c) -{ - int ret = 0; - - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { - if (should_run_recovery_pass(c, c->curr_recovery_pass)) { - unsigned pass = c->curr_recovery_pass; - - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || - (ret && c->curr_recovery_pass < pass)) - continue; - if (ret) - break; - - c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); - } - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); - } - - return ret; -} - -int bch2_run_online_recovery_passes(struct bch_fs *c) -{ - int ret = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { - struct recovery_pass_fn *p = recovery_pass_fns + i; - - if (!(p->when & PASS_ONLINE)) - continue; - - ret = bch2_run_recovery_pass(c, i); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { - i = c->curr_recovery_pass; - continue; - } - if (ret) - break; - } - - return ret; -} - int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; @@ -809,24 +591,14 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.fsck && c->opts.norecovery) { - bch_err(c, "cannot select both norecovery and fsck"); - ret = -EINVAL; - goto err; - } + if (c->opts.norecovery) + c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; if (!c->opts.nochanges) { mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; - struct bch_sb_field_ext *ext = - bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); - if (!ext) { - ret = -BCH_ERR_ENOSPC_sb; - mutex_unlock(&c->sb_lock); - goto err; - } - if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { ext->recovery_passes_required[0] |= cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); @@ -885,7 +657,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { + if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) { struct genradix_iter iter; struct journal_replay **i; @@ -965,7 +737,7 @@ use_clean: c->journal_replay_seq_end = blacklist_seq - 1; if (c->opts.reconstruct_alloc) - do_reconstruct_alloc(c); + bch2_reconstruct_alloc(c); zero_out_btree_mem_ptr(&c->journal_keys); @@ -1017,6 +789,12 @@ use_clean: clear_bit(BCH_FS_fsck_running, &c->flags); + /* fsync if we fixed errors */ + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_meta(&c->journal); + } + /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && test_bit(BCH_FS_errors_fixed, &c->flags) && @@ -1051,6 +829,7 @@ use_clean: } mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { @@ -1064,15 +843,18 @@ use_clean: write_sb = true; } - if (!test_bit(BCH_FS_error, &c->flags)) { - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - if (ext && - (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || - !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { - memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); - memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); - write_sb = true; - } + if (!test_bit(BCH_FS_error, &c->flags) && + !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) { + memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); + write_sb = true; + } + + if (c->opts.fsck && + !test_bit(BCH_FS_error, &c->flags) && + c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 && + ext->btrees_lost_data) { + ext->btrees_lost_data = 0; + write_sb = true; } if (c->opts.fsck && @@ -1113,9 +895,10 @@ use_clean: out: bch2_flush_fsck_errs(c); - if (!c->opts.keep_journal && - test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) + if (!c->opts.retain_recovery_info) { bch2_journal_keys_put_initial(c); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); + } kfree(clean); if (!ret && @@ -1141,6 +924,7 @@ int bch2_fs_initialize(struct bch_fs *c) int ret; bch_notice(c, "initializing new filesystem"); + set_bit(BCH_FS_new_fs, &c->flags); mutex_lock(&c->sb_lock); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); @@ -1155,11 +939,11 @@ int bch2_fs_initialize(struct bch_fs *c) } mutex_unlock(&c->sb_lock); - c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); + c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) - bch2_btree_root_alloc(c, i); + bch2_btree_root_alloc_fake(c, i, 0); for_each_member_device(c, ca) bch2_dev_usage_init(ca); @@ -1230,7 +1014,7 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; + c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 4e9d24719b2e..4bf818de1f2f 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,37 +2,9 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -extern const char * const bch2_recovery_passes[]; +void bch2_btree_lost_data(struct bch_fs *, enum btree_id); -u64 bch2_recovery_passes_to_stable(u64 v); -u64 bch2_recovery_passes_from_stable(u64 v); - -/* - * For when we need to rewind recovery passes and run a pass we skipped: - */ -static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - if (c->recovery_passes_explicit & BIT_ULL(pass)) - return 0; - - bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); - - c->recovery_passes_explicit |= BIT_ULL(pass); - - if (c->curr_recovery_pass >= pass) { - c->curr_recovery_pass = pass; - c->recovery_passes_complete &= (1ULL << pass) >> 1; - return -BCH_ERR_restart_recovery; - } else { - return 0; - } -} - -int bch2_run_online_recovery_passes(struct bch_fs *); -u64 bch2_fsck_recovery_passes(void); +int bch2_journal_replay(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c new file mode 100644 index 000000000000..0cec0f7d9703 --- /dev/null +++ b/fs/bcachefs/recovery_passes.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "alloc_background.h" +#include "backpointers.h" +#include "btree_gc.h" +#include "btree_node_scan.h" +#include "ec.h" +#include "fsck.h" +#include "inode.h" +#include "journal.h" +#include "lru.h" +#include "logged_ops.h" +#include "rebalance.h" +#include "recovery.h" +#include "recovery_passes.h" +#include "snapshot.h" +#include "subvolume.h" +#include "super.h" +#include "super-io.h" + +const char * const bch2_recovery_passes[] = { +#define x(_fn, ...) #_fn, + BCH_RECOVERY_PASSES() +#undef x + NULL +}; + +static int bch2_check_allocations(struct bch_fs *c) +{ + return bch2_gc(c, true, false); +} + +static int bch2_set_may_go_rw(struct bch_fs *c) +{ + struct journal_keys *keys = &c->journal_keys; + + /* + * After we go RW, the journal keys buffer can't be modified (except for + * setting journal_key->overwritten: it will be accessed by multiple + * threads + */ + move_gap(keys, keys->nr); + + set_bit(BCH_FS_may_go_rw, &c->flags); + + if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit) + return bch2_fs_read_write_early(c); + return 0; +} + +struct recovery_pass_fn { + int (*fn)(struct bch_fs *); + unsigned when; +}; + +static struct recovery_pass_fn recovery_pass_fns[] = { +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, + BCH_RECOVERY_PASSES() +#undef x +}; + +static const u8 passes_to_stable_map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + +static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass) +{ + return passes_to_stable_map[pass]; +} + +u64 bch2_recovery_passes_to_stable(u64 v) +{ + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(passes_to_stable_map[i]); + return ret; +} + +u64 bch2_recovery_passes_from_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +/* + * For when we need to rewind recovery passes and run a pass we skipped: + */ +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return 0; + + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", + bch2_recovery_passes[pass], pass, + bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); + + c->recovery_passes_explicit |= BIT_ULL(pass); + + if (c->curr_recovery_pass >= pass) { + c->curr_recovery_pass = pass; + c->recovery_passes_complete &= (1ULL << pass) >> 1; + return -BCH_ERR_restart_recovery; + } else { + return 0; + } +} + +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (!test_bit_le64(s, ext->recovery_passes_required)) { + __set_bit_le64(s, ext->recovery_passes_required); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); + + return bch2_run_explicit_recovery_pass(c, pass); +} + +static void bch2_clear_recovery_pass_required(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (test_bit_le64(s, ext->recovery_passes_required)) { + __clear_bit_le64(s, ext->recovery_passes_required); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); +} + +u64 bch2_fsck_recovery_passes(void) +{ + u64 ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) + if (recovery_pass_fns[i].when & PASS_FSCK) + ret |= BIT_ULL(i); + return ret; +} + +static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + struct recovery_pass_fn *p = recovery_pass_fns + pass; + + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return true; + if ((p->when & PASS_FSCK) && c->opts.fsck) + return true; + if ((p->when & PASS_UNCLEAN) && !c->sb.clean) + return true; + if (p->when & PASS_ALWAYS) + return true; + return false; +} + +static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + struct recovery_pass_fn *p = recovery_pass_fns + pass; + int ret; + + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), + bch2_recovery_passes[pass]); + ret = p->fn(c); + if (ret) + return ret; + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_CONT " done\n"); + + return 0; +} + +int bch2_run_online_recovery_passes(struct bch_fs *c) +{ + int ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { + struct recovery_pass_fn *p = recovery_pass_fns + i; + + if (!(p->when & PASS_ONLINE)) + continue; + + ret = bch2_run_recovery_pass(c, i); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { + i = c->curr_recovery_pass; + continue; + } + if (ret) + break; + } + + return ret; +} + +int bch2_run_recovery_passes(struct bch_fs *c) +{ + int ret = 0; + + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + if (c->opts.recovery_pass_last && + c->curr_recovery_pass > c->opts.recovery_pass_last) + break; + + if (should_run_recovery_pass(c, c->curr_recovery_pass)) { + unsigned pass = c->curr_recovery_pass; + + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || + (ret && c->curr_recovery_pass < pass)) + continue; + if (ret) + break; + + c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + } + + c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); + + if (!test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); + + c->curr_recovery_pass++; + } + + return ret; +} diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h new file mode 100644 index 000000000000..99b464e127b8 --- /dev/null +++ b/fs/bcachefs/recovery_passes.h @@ -0,0 +1,17 @@ +#ifndef _BCACHEFS_RECOVERY_PASSES_H +#define _BCACHEFS_RECOVERY_PASSES_H + +extern const char * const bch2_recovery_passes[]; + +u64 bch2_recovery_passes_to_stable(u64 v); +u64 bch2_recovery_passes_from_stable(u64 v); + +u64 bch2_fsck_recovery_passes(void); + +int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int bch2_run_online_recovery_passes(struct bch_fs *); +int bch2_run_recovery_passes(struct bch_fs *); + +#endif /* _BCACHEFS_RECOVERY_PASSES_H */ diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_passes_types.h similarity index 91% rename from fs/bcachefs/recovery_types.h rename to fs/bcachefs/recovery_passes_types.h index 4959e95e7c74..773aea9a0080 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_RECOVERY_TYPES_H -#define _BCACHEFS_RECOVERY_TYPES_H +#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H +#define _BCACHEFS_RECOVERY_PASSES_TYPES_H #define PASS_SILENT BIT(0) #define PASS_FSCK BIT(1) @@ -13,6 +13,7 @@ * must never change: */ #define BCH_RECOVERY_PASSES() \ + x(scan_for_btree_nodes, 37, 0) \ x(check_topology, 4, 0) \ x(alloc_read, 0, PASS_ALWAYS) \ x(stripes_read, 1, PASS_ALWAYS) \ @@ -31,13 +32,13 @@ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(bucket_gens_init, 17, 0) \ + x(reconstruct_snapshots, 38, 0) \ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ x(fs_upgrade_for_subvolumes, 22, 0) \ - x(resume_logged_ops, 23, PASS_ALWAYS) \ x(check_inodes, 24, PASS_FSCK) \ x(check_extents, 25, PASS_FSCK) \ x(check_indirect_extents, 26, PASS_FSCK) \ @@ -47,6 +48,7 @@ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ + x(resume_logged_ops, 23, PASS_ALWAYS) \ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 33, 0) \ x(set_fs_needs_rebalance, 34, 0) \ @@ -56,6 +58,7 @@ enum bch_recovery_pass { #define x(n, id, when) BCH_RECOVERY_PASS_##n, BCH_RECOVERY_PASSES() #undef x + BCH_RECOVERY_PASS_NR }; /* But we also need stable identifiers that can be used in the superblock */ @@ -65,4 +68,4 @@ enum bch_recovery_pass_stable { #undef x }; -#endif /* _BCACHEFS_RECOVERY_TYPES_H */ +#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index c47c66c2b394..ff7864731a07 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -185,8 +185,7 @@ not_found: } else { bkey_error_init(update); update->k.p = p.k->p; - update->k.p.offset = next_idx; - update->k.size = next_idx - *idx; + update->k.size = p.k->size; set_bkey_val_u64s(&update->k, 0); } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index cc2672c12031..678b9c20e251 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -6,12 +6,15 @@ #include "replicas.h" #include "super-io.h" +#include + static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, struct bch_replicas_cpu *); /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ -static int bch2_memcmp(const void *l, const void *r, size_t size) +static int bch2_memcmp(const void *l, const void *r, const void *priv) { + size_t size = (size_t) priv; return memcmp(l, r, size); } @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) { - eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); + eytzinger0_sort_r(r->entries, r->nr, r->entry_size, + bch2_memcmp, NULL, (void *)(size_t)r->entry_size); } static void bch2_replicas_entry_v0_to_text(struct printbuf *out, @@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, verify_replicas_entry(search); -#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) +#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) idx = eytzinger0_find(r->entries, r->nr, r->entry_size, entry_cmp, search); #undef entry_cmp @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, { unsigned i; - sort_cmp_size(cpu_r->entries, - cpu_r->nr, - cpu_r->entry_size, - bch2_memcmp, NULL); + sort_r(cpu_r->entries, + cpu_r->nr, + cpu_r->entry_size, + bch2_memcmp, NULL, + (void *)(size_t)cpu_r->entry_size); for (i = 0; i < cpu_r->nr; i++) { struct bch_replicas_entry_v1 *e = diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index e4396cb0bacb..a98ef940b7a3 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -7,7 +7,7 @@ #include "bcachefs.h" #include "darray.h" -#include "recovery.h" +#include "recovery_passes.h" #include "sb-downgrade.h" #include "sb-errors.h" #include "super-io.h" @@ -51,7 +51,10 @@ BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ x(btree_subvolume_children, \ BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ - BCH_FSCK_ERR_subvol_children_not_set) + BCH_FSCK_ERR_subvol_children_not_set) \ + x(mi_btree_bitmap, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_btree_bitmap_not_marked) #define DOWNGRADE_TABLE() diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 5178bf579f7c..4ca6e7b0d8aa 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -130,7 +130,7 @@ x(bucket_gens_nonzero_for_invalid_buckets, 122) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ x(need_discard_freespace_key_bad, 124) \ - x(backpointer_pos_wrong, 125) \ + x(backpointer_bucket_offset_wrong, 125) \ x(backpointer_to_missing_device, 126) \ x(backpointer_to_missing_alloc, 127) \ x(backpointer_to_missing_ptr, 128) \ @@ -265,7 +265,13 @@ x(subvol_children_bad, 257) \ x(subvol_loop, 258) \ x(subvol_unreachable, 259) \ - x(btree_node_bkey_bad_u64s, 260) + x(btree_node_bkey_bad_u64s, 260) \ + x(btree_node_topology_empty_interior_node, 261) \ + x(btree_ptr_v2_min_key_bad, 262) \ + x(btree_root_unreadable_and_scan_found_nothing, 263) \ + x(snapshot_node_missing, 264) \ + x(dup_backpointer_to_bad_csum_extent, 265) \ + x(btree_bitmap_not_marked, 266) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index eff5ce18c69c..522a969345e5 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "btree_cache.h" #include "disk_groups.h" #include "opts.h" #include "replicas.h" @@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca) bch2_write_super(c); mutex_unlock(&c->sb_lock); } + +/* + * Per member "range has btree nodes" bitmap: + * + * This is so that if we ever have to run the btree node scan to repair we don't + * have to scan full devices: + */ + +bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) +{ + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) + if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev), + ptr->offset, btree_sectors(c))) + return false; + return true; +} + +static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, + u64 start, unsigned sectors) +{ + struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); + u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); + + u64 end = start + sectors; + + int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); + if (resize > 0) { + u64 new_bitmap = 0; + + for (unsigned i = 0; i < 64; i++) + if (bitmap & BIT_ULL(i)) + new_bitmap |= BIT_ULL(i >> resize); + bitmap = new_bitmap; + m->btree_bitmap_shift += resize; + } + + for (unsigned bit = sectors >> m->btree_bitmap_shift; + bit << m->btree_bitmap_shift < end; + bit++) + bitmap |= BIT_ULL(bit); + + m->btree_allocated_bitmap = cpu_to_le64(bitmap); +} + +void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) + __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index be0a94183271..b27c3e4467cf 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -3,6 +3,7 @@ #define _BCACHEFS_SB_MEMBERS_H #include "darray.h" +#include "bkey_types.h" extern char * const bch2_member_error_strs[]; @@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) : 1, .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), .valid = bch2_member_exists(mi), + .btree_bitmap_shift = mi->btree_bitmap_shift, + .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), }; } @@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *); void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); void bch2_dev_errors_reset(struct bch_dev *); +static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors) +{ + u64 end = start + sectors; + + if (end > 64 << ca->mi.btree_bitmap_shift) + return false; + + for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift; + bit << ca->mi.btree_bitmap_shift < end; + bit++) + if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) + return false; + return true; +} + +bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); +void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); + #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 39debe814bf3..544322d5c251 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -8,6 +8,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include @@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans, static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) { - while (id && id < ancestor) - id = __snapshot_t(t, id)->parent; + while (id && id < ancestor) { + const struct snapshot_t *s = __snapshot_t(t, id); + id = s ? s->parent : 0; + } return id == ancestor; } @@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) { const struct snapshot_t *s = __snapshot_t(t, id); + if (!s) + return 0; if (s->skip[2] <= ancestor) return s->skip[2]; @@ -120,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances return s->parent; } +static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) +{ + const struct snapshot_t *s = __snapshot_t(t, id); + if (!s) + return false; + + return test_bit(ancestor - id - 1, s->is_ancestor); +} + bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { bool ret; @@ -127,7 +141,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) rcu_read_lock(); struct snapshot_table *t = rcu_dereference(c->snapshots); - if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) { + if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) { ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); goto out; } @@ -135,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); - if (id && id < ancestor) { - ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); + ret = id && id < ancestor + ? test_ancestor_bitmap(t, id, ancestor) + : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); - } else { - ret = id == ancestor; - } + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); out: rcu_read_unlock(); @@ -151,36 +163,39 @@ out: static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) { size_t idx = U32_MAX - id; - size_t new_size; struct snapshot_table *new, *old; - new_size = max(16UL, roundup_pow_of_two(idx + 1)); + size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); + size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); - new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); + new = kvzalloc(new_bytes, GFP_KERNEL); if (!new) return NULL; + new->nr = new_size; + old = rcu_dereference_protected(c->snapshots, true); if (old) - memcpy(new->s, - rcu_dereference_protected(c->snapshots, true)->s, - sizeof(new->s[0]) * c->snapshot_table_size); + memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr); rcu_assign_pointer(c->snapshots, new); - c->snapshot_table_size = new_size; - kvfree_rcu_mightsleep(old); + kvfree_rcu(old, rcu); - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + return &rcu_dereference_protected(c->snapshots, + lockdep_is_held(&c->snapshot_table_lock))->s[idx]; } static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) { size_t idx = U32_MAX - id; + struct snapshot_table *table = + rcu_dereference_protected(c->snapshots, + lockdep_is_held(&c->snapshot_table_lock)); lockdep_assert_held(&c->snapshot_table_lock); - if (likely(idx < c->snapshot_table_size)) - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + if (likely(table && idx < table->nr)) + return &table->s[idx]; return __snapshot_t_mut(c, id); } @@ -567,6 +582,13 @@ static int check_snapshot_tree(struct btree_trans *trans, u32 subvol_id; ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); + bch_err_fn(c, ret); + + if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */ + ret = 0; + goto err; + } + if (ret) goto err; @@ -724,7 +746,6 @@ static int check_snapshot(struct btree_trans *trans, u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 real_depth; struct printbuf buf = PRINTBUF; - bool should_have_subvol; u32 i, id; int ret = 0; @@ -770,7 +791,7 @@ static int check_snapshot(struct btree_trans *trans, } } - should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && + bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && !BCH_SNAPSHOT_DELETED(&s); if (should_have_subvol) { @@ -872,6 +893,154 @@ int bch2_check_snapshots(struct bch_fs *c) return ret; } +static int check_snapshot_exists(struct btree_trans *trans, u32 id) +{ + struct bch_fs *c = trans->c; + + if (bch2_snapshot_equiv(c, id)) + return 0; + + u32 tree_id; + int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); + if (ret) + return ret; + + struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); + ret = PTR_ERR_OR_ZERO(snapshot); + if (ret) + return ret; + + bkey_snapshot_init(&snapshot->k_i); + snapshot->k.p = POS(0, id); + snapshot->v.tree = cpu_to_le32(tree_id); + snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); + + return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: + bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: + bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i)); +} + +/* Figure out which snapshot nodes belong in the same tree: */ +struct snapshot_tree_reconstruct { + enum btree_id btree; + struct bpos cur_pos; + snapshot_id_list cur_ids; + DARRAY(snapshot_id_list) trees; +}; + +static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) +{ + darray_for_each(r->trees, i) + darray_exit(i); + darray_exit(&r->trees); + darray_exit(&r->cur_ids); +} + +static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + return r->btree == BTREE_ID_inodes + ? r->cur_pos.offset == pos.offset + : r->cur_pos.inode == pos.inode; +} + +static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) +{ + darray_for_each(*l, i) + if (snapshot_list_has_id(r, *i)) + return true; + return false; +} + +static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) +{ + bool first = true; + darray_for_each(*s, i) { + if (!first) + prt_char(out, ' '); + first = false; + prt_printf(out, "%u", *i); + } +} + +static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) +{ + if (r->cur_ids.nr) { + darray_for_each(r->trees, i) + if (snapshot_id_lists_have_common(i, &r->cur_ids)) { + int ret = snapshot_list_merge(c, i, &r->cur_ids); + if (ret) + return ret; + goto out; + } + darray_push(&r->trees, r->cur_ids); + darray_init(&r->cur_ids); + } +out: + r->cur_ids.nr = 0; + return 0; +} + +static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + if (!same_snapshot(r, pos)) + snapshot_tree_reconstruct_next(c, r); + r->cur_pos = pos; + return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); +} + +int bch2_reconstruct_snapshots(struct bch_fs *c) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; + struct snapshot_tree_reconstruct r = {}; + int ret = 0; + + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { + if (btree_type_has_snapshots(btree)) { + r.btree = btree; + + ret = for_each_btree_key(trans, iter, btree, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({ + get_snapshot_trees(c, &r, k.k->p); + })); + if (ret) + goto err; + + snapshot_tree_reconstruct_next(c, &r); + } + } + + darray_for_each(r.trees, t) { + printbuf_reset(&buf); + snapshot_id_list_to_text(&buf, t); + + darray_for_each(*t, id) { + if (fsck_err_on(!bch2_snapshot_equiv(c, *id), + c, snapshot_node_missing, + "snapshot node %u from tree %s missing", *id, buf.buf)) { + if (t->nr > 1) { + bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err; + } + + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot_exists(trans, *id)); + if (ret) + goto err; + } + } + } +fsck_err: +err: + bch2_trans_put(trans); + snapshot_tree_reconstruct_exit(&r); + printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + /* * Mark a snapshot as deleted, for future cleanup: */ @@ -1682,6 +1851,20 @@ int bch2_snapshots_read(struct bch_fs *c) POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); bch_err_fn(c, ret); + + /* + * It's important that we check if we need to reconstruct snapshots + * before going RW, so we mark that pass as required in the superblock - + * otherwise, we could end up deleting keys with missing snapshot nodes + * instead + */ + BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && + test_bit(BCH_FS_may_go_rw, &c->flags)); + + if (bch2_err_matches(ret, EIO) || + (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))) + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots); + return ret; } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 7c66ffc06385..b7d2fed37c4f 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) { - return &t->s[U32_MAX - id]; + u32 idx = U32_MAX - id; + + return likely(t && idx < t->nr) + ? &t->s[idx] + : NULL; } static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) @@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) { rcu_read_lock(); - id = snapshot_t(c, id)->tree; + const struct snapshot_t *s = snapshot_t(c, id); + id = s ? s->tree : 0; rcu_read_unlock(); return id; @@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { - return snapshot_t(c, id)->parent; + const struct snapshot_t *s = snapshot_t(c, id); + return s ? s->parent : 0; } static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) @@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) { -#ifdef CONFIG_BCACHEFS_DEBUG - u32 parent = snapshot_t(c, id)->parent; + const struct snapshot_t *s = snapshot_t(c, id); + if (!s) + return 0; - if (parent && - snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) + u32 parent = s->parent; + if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) && + parent && + s->depth != snapshot_t(c, parent)->depth + 1) panic("id %u depth=%u parent %u depth=%u\n", id, snapshot_t(c, id)->depth, parent, snapshot_t(c, parent)->depth); return parent; -#else - return snapshot_t(c, id)->parent; -#endif } static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) @@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) { - return snapshot_t(c, id)->equiv; + const struct snapshot_t *s = snapshot_t(c, id); + return s ? s->equiv : 0; } static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) @@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) return id == bch2_snapshot_equiv(c, id); } -static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) +static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) { - const struct snapshot_t *s; - bool ret; - rcu_read_lock(); - s = snapshot_t(c, id); - ret = s->children[0]; + const struct snapshot_t *s = snapshot_t(c, id); + int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node; rcu_read_unlock(); return ret; } -static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) +static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) { - return !bch2_snapshot_is_internal_node(c, id); -} - -static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) -{ - const struct snapshot_t *s; - u32 parent = __bch2_snapshot_parent(c, id); - - if (!parent) - return 0; - - s = snapshot_t(c, __bch2_snapshot_parent(c, id)); - if (id == s->children[0]) - return s->children[1]; - if (id == s->children[1]) - return s->children[0]; - return 0; + int ret = bch2_snapshot_is_internal_node(c, id); + if (ret < 0) + return ret; + return !ret; } static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) @@ -218,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) { - int ret; - BUG_ON(snapshot_list_has_id(s, id)); - ret = darray_push(s, id); + int ret = darray_push(s, id); if (ret) bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); return ret; } +static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id) +{ + int ret = snapshot_list_has_id(s, id) + ? 0 + : darray_push(s, id); + if (ret) + bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); + return ret; +} + +static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src) +{ + darray_for_each(*src, i) { + int ret = snapshot_list_add_nodup(c, dst, *i); + if (ret) + return ret; + } + + return 0; +} + int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot *s); int bch2_snapshot_get_subvol(struct btree_trans *, u32, @@ -238,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32, int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); +int bch2_reconstruct_snapshots(struct bch_fs *); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); void bch2_delete_dead_snapshots_work(struct work_struct *); @@ -249,7 +260,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, struct bpos pos) { if (!btree_type_has_snapshots(id) || - bch2_snapshot_is_leaf(trans->c, pos.snapshot)) + bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0) return 0; return __bch2_key_has_snapshot_overwrites(trans, id, pos); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index ce7aed121942..88a79c823276 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -595,6 +595,78 @@ err: return ret; } +int bch2_initialize_subvolumes(struct bch_fs *c) +{ + struct bkey_i_snapshot_tree root_tree; + struct bkey_i_snapshot root_snapshot; + struct bkey_i_subvolume root_volume; + int ret; + + bkey_snapshot_tree_init(&root_tree.k_i); + root_tree.k.p.offset = 1; + root_tree.v.master_subvol = cpu_to_le32(1); + root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); + + bkey_snapshot_init(&root_snapshot.k_i); + root_snapshot.k.p.offset = U32_MAX; + root_snapshot.v.flags = 0; + root_snapshot.v.parent = 0; + root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); + root_snapshot.v.tree = cpu_to_le32(1); + SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); + + bkey_subvolume_init(&root_volume.k_i); + root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; + root_volume.v.flags = 0; + root_volume.v.snapshot = cpu_to_le32(U32_MAX); + root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); + + ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); + bch_err_fn(c, ret); + return ret; +} + +static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bch_inode_unpacked inode; + int ret; + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); + ret = bkey_err(k); + if (ret) + return ret; + + if (!bkey_is_inode(k.k)) { + bch_err(trans->c, "root inode not found"); + ret = -BCH_ERR_ENOENT_inode; + goto err; + } + + ret = bch2_inode_unpack(k, &inode); + BUG_ON(ret); + + inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; + + ret = bch2_inode_write(trans, &iter, &inode); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +/* set bi_subvol on root inode */ +int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) +{ + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, + __bch2_fs_upgrade_for_subvolumes(trans)); + bch_err_fn(c, ret); + return ret; +} + int bch2_fs_subvolumes_init(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 903c05162c06..d2015d549bd2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *); int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool); +int bch2_initialize_subvolumes(struct bch_fs *); +int bch2_fs_upgrade_for_subvolumes(struct bch_fs *); + int bch2_fs_subvolumes_init(struct bch_fs *); #endif /* _BCACHEFS_SUBVOLUME_H */ diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index ae644adfc391..9b10c8947828 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -20,6 +20,8 @@ struct snapshot_t { }; struct snapshot_table { + struct rcu_head rcu; + size_t nr; #ifndef RUST_BINDGEN DECLARE_FLEX_ARRAY(struct snapshot_t, s); #else diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index ad28e370b640..08ea3dbbbe97 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -8,7 +8,7 @@ #include "journal.h" #include "journal_sb.h" #include "journal_seq_blacklist.h" -#include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "quota.h" #include "sb-clean.h" @@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb) { kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->s_bdev_file)) - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); kfree(sb->holder); kfree(sb->sb_name); @@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c) memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); - if (ext) + if (ext) { le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); + c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); + } for_each_member_device(c, ca) { struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); @@ -698,8 +700,11 @@ retry: return -ENOMEM; sb->sb_name = kstrdup(path, GFP_KERNEL); - if (!sb->sb_name) - return -ENOMEM; + if (!sb->sb_name) { + ret = -ENOMEM; + prt_printf(&err, "error allocating memory for sb_name"); + goto err; + } #ifndef __KERNEL__ if (opt_get(*opts, direct_io) == false) @@ -1162,6 +1167,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, kfree(errors_silent); } + + prt_printf(out, "Btrees with missing data:"); + prt_tab(out); + prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data)); + prt_newline(out); } static const struct bch_sb_field_ops bch_sb_field_ops_ext = { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1ad6e5cd9476..8daf80a38d60 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -15,6 +15,7 @@ #include "btree_gc.h" #include "btree_journal_iter.h" #include "btree_key_cache.h" +#include "btree_node_scan.h" #include "btree_update_interior.h" #include "btree_io.h" #include "btree_write_buffer.h" @@ -287,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c) if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) && !test_bit(BCH_FS_emergency_ro, &c->flags)) set_bit(BCH_FS_clean_shutdown, &c->flags); + bch2_fs_journal_stop(&c->journal); + bch_info(c, "%sshutdown complete, journal seq %llu", + test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un", + c->journal.seq_ondisk); + /* * After stopping journal: */ @@ -365,7 +371,7 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_emergency_ro, &c->flags) && test_bit(BCH_FS_started, &c->flags) && test_bit(BCH_FS_clean_shutdown, &c->flags) && - !c->opts.norecovery) { + c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) { BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(atomic_read(&c->btree_cache.dirty)); BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); @@ -510,7 +516,8 @@ err: int bch2_fs_read_write(struct bch_fs *c) { - if (c->opts.norecovery) + if (c->opts.recovery_pass_last && + c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay) return -BCH_ERR_erofs_norecovery; if (c->opts.nochanges) @@ -535,6 +542,7 @@ static void __bch2_fs_free(struct bch_fs *c) for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); @@ -559,6 +567,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); bch2_journal_keys_put_initial(c); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); @@ -1015,8 +1024,16 @@ int bch2_fs_start(struct bch_fs *c) for_each_online_member(c, ca) bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); mutex_unlock(&c->sb_lock); + if (!ext) { + bch_err(c, "insufficient space in superblock for sb_field_ext"); + ret = -BCH_ERR_ENOSPC_sb; + goto err; + } + for_each_rw_member(c, ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index ec784d975f66..11bcef170c2c 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -37,6 +37,8 @@ struct bch_member_cpu { u8 durability; u8 freespace_initialized; u8 valid; + u8 btree_bitmap_shift; + u64 btree_allocated_bitmap; }; #endif /* _BCACHEFS_SUPER_TYPES_H */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index c86a93a8d8fc..5be92fe3f4ea 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -17,7 +17,6 @@ #include "btree_iter.h" #include "btree_key_cache.h" #include "btree_update.h" -#include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" #include "clock.h" @@ -26,6 +25,7 @@ #include "ec.h" #include "inode.h" #include "journal.h" +#include "journal_reclaim.h" #include "keylist.h" #include "move.h" #include "movinggc.h" @@ -139,6 +139,7 @@ do { \ write_attribute(trigger_gc); write_attribute(trigger_discards); write_attribute(trigger_invalidates); +write_attribute(trigger_journal_flush); write_attribute(prune_cache); write_attribute(btree_wakeup); rw_attribute(btree_gc_periodic); @@ -166,7 +167,6 @@ read_attribute(btree_write_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); read_attribute(journal_debug); -read_attribute(btree_updates); read_attribute(btree_cache); read_attribute(btree_key_cache); read_attribute(stripes_heap); @@ -415,9 +415,6 @@ SHOW(bch2_fs) if (attr == &sysfs_journal_debug) bch2_journal_debug_to_text(out, &c->journal); - if (attr == &sysfs_btree_updates) - bch2_btree_updates_to_text(out, c); - if (attr == &sysfs_btree_cache) bch2_btree_cache_to_text(out, c); @@ -505,7 +502,7 @@ STORE(bch2_fs) /* Debugging: */ - if (!test_bit(BCH_FS_rw, &c->flags)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)) return -EROFS; if (attr == &sysfs_prune_cache) { @@ -538,6 +535,11 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_invalidates) bch2_do_invalidates(c); + if (attr == &sysfs_trigger_journal_flush) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_meta(&c->journal); + } + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -558,6 +560,7 @@ STORE(bch2_fs) size = ret; } #endif + bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); return size; } SYSFS_OPS(bch2_fs); @@ -639,7 +642,6 @@ SYSFS_OPS(bch2_fs_internal); struct attribute *bch2_fs_internal_files[] = { &sysfs_flags, &sysfs_journal_debug, - &sysfs_btree_updates, &sysfs_btree_cache, &sysfs_btree_key_cache, &sysfs_new_stripes, @@ -657,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_gc, &sysfs_trigger_discards, &sysfs_trigger_invalidates, + &sysfs_trigger_journal_flush, &sysfs_prune_cache, &sysfs_btree_wakeup, diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index b3fe9fc57747..bfec656f94c0 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 216fadf16928..92c6ad75e702 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -static int alignment_ok(const void *base, size_t align) -{ - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || - ((unsigned long)base & (align - 1)) == 0; -} - -static void u32_swap(void *a, void *b, size_t size) -{ - u32 t = *(u32 *)a; - *(u32 *)a = *(u32 *)b; - *(u32 *)b = t; -} - -static void u64_swap(void *a, void *b, size_t size) -{ - u64 t = *(u64 *)a; - *(u64 *)a = *(u64 *)b; - *(u64 *)b = t; -} - -static void generic_swap(void *a, void *b, size_t size) -{ - char t; - - do { - t = *(char *)a; - *(char *)a++ = *(char *)b; - *(char *)b++ = t; - } while (--size > 0); -} - -static inline int do_cmp(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - size_t l, size_t r) -{ - return cmp_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -static inline void do_swap(void *base, size_t n, size_t size, - void (*swap_func)(void *, void *, size_t), - size_t l, size_t r) -{ - swap_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -void eytzinger0_sort(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)) -{ - int i, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for (i = n / 2 - 1; i >= 0; --i) { - for (r = i; r * 2 + 1 < n; r = c) { - c = r * 2 + 1; - - if (c + 1 < n && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } - - /* sort */ - for (i = n - 1; i > 0; --i) { - do_swap(base, n, size, swap_func, 0, i); - - for (r = 0; r * 2 + 1 < i; r = c) { - c = r * 2 + 1; - - if (c + 1 < i && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } -} - -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t size)) -{ - /* pre-scale counters for performance */ - int i = (num/2 - 1) * size, n = num * size, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for ( ; i >= 0; i -= size) { - for (r = i; r * 2 + size < n; r = c) { - c = r * 2 + size; - if (c < n - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } - - /* sort */ - for (i = n - size; i > 0; i -= size) { - swap_func(base, base + i, size); - for (r = 0; r * 2 + size < i; r = c) { - c = r * 2 + size; - if (c < i - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } -} - #if 0 void eytzinger1_test(void) { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 175aee3074c7..5cf885b09986 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) memset(s + bytes, c, rem); } -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); - /* just the memmove, doesn't update @_nr */ #define __array_insert_item(_array, _nr, _pos) \ memmove(&(_array)[(_pos) + 1], \ @@ -792,9 +788,27 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi #endif +static inline void mod_bit(long nr, volatile unsigned long *addr, bool v) +{ + if (v) + set_bit(nr, addr); + else + clear_bit(nr, addr); +} + static inline void __set_bit_le64(size_t bit, __le64 *addr) { addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64)); } +static inline void __clear_bit_le64(size_t bit, __le64 *addr) +{ + addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64)); +} + +static inline bool test_bit_le64(size_t bit, __le64 *addr) +{ + return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; +} + #endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1920ed69279b..3314249e8674 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1359,7 +1359,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); - strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); + get_task_comm(psinfo->pr_fname, p); return 0; } diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5f7587ca1ca7..1e09aeea69c2 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used) { + if (space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index dd6f566a383f..121ab890bd05 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index beedd6ed64d3..257d044bca91 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (root_id != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_ref generic_ref = { 0 }; + /* + * Assert that the extent buffer is not cleared due to + * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer + * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for + * detail. + */ + ASSERT(btrfs_header_bytenr(buf) != 0); + btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent, btrfs_header_owner(buf)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7441245b1ceb..2776112dbdf8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, gfp_t extra_gfp) { + const gfp_t gfp = GFP_NOFS | extra_gfp; unsigned int allocated; for (allocated = 0; allocated < nr_pages;) { unsigned int last = allocated; - allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp, - nr_pages, page_array); - - if (allocated == nr_pages) - return 0; - - /* - * During this iteration, no page could be allocated, even - * though alloc_pages_bulk_array() falls back to alloc_page() - * if it could not bulk-allocate. So we must be out of memory. - */ - if (allocated == last) { + allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array); + if (unlikely(allocated == last)) { + /* No progress, fail and do cleanup. */ for (int i = 0; i < allocated; i++) { __free_page(page_array[i]); page_array[i] = NULL; } return -ENOMEM; } - - memalloc_retry_wait(GFP_NOFS); } return 0; } @@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, * The actual zeroout of the buffer will happen later in * btree_csum_one_bio. */ - if (btrfs_is_zoned(fs_info)) { + if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags); return; } @@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb) num_folios = num_extent_folios(eb); WARN_ON(atomic_read(&eb->refs) == 0); WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); + WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); if (!was_dirty) { bool subpage = eb->fs_info->nodesize < PAGE_SIZE; @@ -4333,6 +4324,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) goto done; + /* + * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above + * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have + * started and finished reading the same eb. In this case, UPTODATE + * will now be set, and we shouldn't read it in again. + */ + if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) { + clear_bit(EXTENT_BUFFER_READING, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING); + return 0; + } + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; check_buffer_tree_ref(eb); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 347ca13d15a9..445f7716f1e2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) btrfs_warn(fs_info, "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu", btrfs_ino(inode), btrfs_root_id(inode->root), - start, len, gen); + start, start + len, gen); ret = -ENOENT; goto out; } @@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) btrfs_warn(fs_info, "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu", btrfs_ino(inode), btrfs_root_id(inode->root), - em->start, start, len, gen); + em->start, start, start + len, gen); ret = -EUCLEAN; goto out; } @@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) em->mod_len = em->len; } - free_extent_map(em); out: write_unlock(&tree->lock); + free_extent_map(em); return ret; } @@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info, */ ret = merge_extent_mapping(em_tree, existing, em, start); - if (ret) { + if (WARN_ON(ret)) { free_extent_map(em); *em_in = NULL; - WARN_ONCE(ret, -"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n", - existing->start, existing->len, - orig_start, orig_len, start); + btrfs_warn(fs_info, +"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu", + existing->start, extent_map_end(existing), + orig_start, orig_start + orig_len, start); } free_extent_map(existing); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 37701531eeb1..c65fe5de4022 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, */ if (bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) - btrfs_delalloc_release_metadata(inode, len, false); + btrfs_delalloc_release_metadata(inode, len, true); /* For sanity tests. */ if (btrfs_is_testing(fs_info)) @@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_block_rsv block_rsv; u64 root_flags; + u64 qgroup_reserved = 0; int ret; down_write(&fs_info->subvol_sem); @@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) goto out_undead; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_release; } + ret = btrfs_record_root_in_trans(trans, root); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; @@ -4611,7 +4620,9 @@ out_end_trans: ret = btrfs_end_transaction(trans); inode->i_flags |= S_DEAD; out_release: - btrfs_subvolume_release_metadata(root, &block_rsv); + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_undead: if (ret) { spin_lock(&dest->root_item_lock); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 294e31edec9d..55f3ba6a831c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, int ret; dev_t anon_dev; u64 objectid; + u64 qgroup_reserved = 0; root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); if (!root_item) @@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap, trans_num_items, false); if (ret) goto out_new_inode_args; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_subvolume_release_metadata(root, &block_rsv); - goto out_new_inode_args; + goto out_release_rsv; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) + goto out; + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; /* Tree log can't currently deal with an inode which is a new root. */ @@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap, out: trans->block_rsv = NULL; trans->bytes_reserved = 0; - btrfs_subvolume_release_metadata(root, &block_rsv); - btrfs_end_transaction(trans); +out_release_rsv: + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: @@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct btrfs_pending_snapshot *pending_snapshot; unsigned int trans_num_items; struct btrfs_trans_handle *trans; + struct btrfs_block_rsv *block_rsv; + u64 qgroup_reserved = 0; int ret; /* We do not support snapshotting right now. */ @@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, goto free_pending; } - btrfs_init_block_rsv(&pending_snapshot->block_rsv, - BTRFS_BLOCK_RSV_TEMP); + block_rsv = &pending_snapshot->block_rsv; + btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP); /* * 1 to add dir item * 1 to add dir index * 1 to update parent inode item */ trans_num_items = create_subvol_num_items(inherit) + 3; - ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, + ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv, trans_num_items, false); if (ret) goto free_pending; + qgroup_reserved = block_rsv->qgroup_rsv_reserved; pending_snapshot->dentry = dentry; pending_snapshot->root = root; @@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ret = PTR_ERR(trans); goto fail; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) { + btrfs_end_transaction(trans); + goto fail; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->pending_snapshot = pending_snapshot; @@ -881,7 +898,9 @@ fail: if (ret && pending_snapshot->snap) pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); - btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); + btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); free_pending: if (pending_snapshot->anon_dev) free_anon_bdev(pending_snapshot->anon_dev); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5f90f0605b12..cf8820ce7aa2 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 4bb538a372ce..7007f9e0c972 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, } return ret; } - -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - u64 qgroup_to_release; - - btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release); - btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release); -} diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h index 6f929cf3bd49..8f5739e732b9 100644 --- a/fs/btrfs/root-tree.h +++ b/fs/btrfs/root-tree.h @@ -18,8 +18,6 @@ struct btrfs_trans_handle; int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv); int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, u64 ref_id, u64 dirid, u64 sequence, const struct fscrypt_str *name); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c4bd0e60db59..fa25004ab04e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = btrfs_get_last_trans_committed(fs_info); for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46e8426adf4f..85f359e0e0a7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -745,14 +745,6 @@ again: h->reloc_reserved = reloc_reserved; } - /* - * Now that we have found a transaction to be a part of, convert the - * qgroup reservation from prealloc to pertrans. A different transaction - * can't race in and free our pertrans out from under us. - */ - if (qgroup_reserved) - btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); - got_it: if (!current->journal_info) current->journal_info = h; @@ -786,8 +778,15 @@ got_it: * not just freed. */ btrfs_end_transaction(h); - return ERR_PTR(ret); + goto reserve_fail; } + /* + * Now that we have found a transaction to be a part of, convert the + * qgroup reservation from prealloc to pertrans. A different transaction + * can't race in and free our pertrans out from under us. + */ + if (qgroup_reserved) + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); return h; @@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + btrfs_qgroup_free_meta_all_pertrans(root); spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); @@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) if (ret2) return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1dc1f1946ae0..f15591f3e54f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->bdev = file_bdev(bdev_file); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); + if (device->devt != device->bdev->bd_dev) { + btrfs_warn(NULL, + "device %s maj:min changed from %d:%d to %d:%d", + device->name->str, MAJOR(device->devt), + MINOR(device->devt), MAJOR(device->bdev->bd_dev), + MINOR(device->bdev->bd_dev)); + + device->devt = device->bdev->bd_dev; + } + fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + int ret = 0; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { - int ret; + int ret2; - ret = btrfs_open_one_device(fs_devices, device, flags, holder); - if (ret == 0 && + ret2 = btrfs_open_one_device(fs_devices, device, flags, holder); + if (ret2 == 0 && (!latest_dev || device->generation > latest_dev->generation)) { latest_dev = device; - } else if (ret == -ENODATA) { + } else if (ret2 == -ENODATA) { fs_devices->num_devices--; list_del(&device->dev_list); btrfs_free_device(device); } + if (ret == 0 && ret2 != 0) + ret = ret2; } - if (fs_devices->open_devices == 0) + + if (fs_devices->open_devices == 0) { + if (ret) + return ret; return -EINVAL; + } fs_devices->opened = 1; fs_devices->latest_dev = latest_dev; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 5a3d5ec75c5a..4cba80b34387 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) if (!map) return -EINVAL; - cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS); - if (!cache->physical_map) { - ret = -ENOMEM; - goto out; - } + cache->physical_map = map; zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); if (!zone_info) { @@ -1690,7 +1686,6 @@ out: } bitmap_free(active); kfree(zone_info); - btrfs_free_chunk_map(map); return ret; } @@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ struct btrfs_chunk_map *map; const bool is_metadata = (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)); + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret = 0; int i; @@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ btrfs_clear_data_reloc_bg(block_group); spin_unlock(&block_group->lock); + down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; @@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ zinfo->zone_size >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); - if (ret) + if (ret) { + up_read(&dev_replace->rwsem); return ret; + } if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) zinfo->reserved_active_zones++; btrfs_dev_clear_active_zone(device, physical); } + up_read(&dev_replace->rwsem); if (!fully_written) btrfs_dec_block_group_ro(block_group); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1340d77124ae..ee9caf7916fb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) ihold(inode); if (wbc->sync_mode == WB_SYNC_NONE && - ceph_inode_to_fs_client(inode)->write_congested) + ceph_inode_to_fs_client(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } wait_on_page_fscache(page); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 55051ad09c19..c4941ba245ac 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_unlink_delay_list); - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); /* * Fire the work immediately, because the MDS maybe diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3ab9c268a8bb..360b686c3c67 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) struct ceph_client *cl = mdsc->fsc->client; doutc(cl, "begin\n"); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_unlink_delay_list)) { struct ceph_inode_info *ci; struct inode *inode; @@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work) inode = igrab(&ci->netfs.inode); if (inode) { - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "on %p %llx.%llx\n", inode, ceph_vinop(inode)); ceph_check_caps(ci, CHECK_CAPS_FLUSH); iput(inode); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); } } - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "done\n"); } @@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); - spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 03f8ff00874f..b88e80415224 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -461,9 +461,8 @@ struct ceph_mds_client { struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ - spinlock_t cap_delay_lock; /* protects cap_delay_list */ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 39e75131fd5a..9901057a15ba 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } kfree(sbi); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 69308fd73e4a..c0eb139adb07 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -430,7 +430,6 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: - warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(&ctx->opt, DAX_ALWAYS); clear_opt(&ctx->opt, DAX_NEVER); return true; diff --git a/fs/exec.c b/fs/exec.c index ff6f26671cfc..cf1df7f16e55 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -895,6 +895,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfb8449c731f..044135796f2b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5668,7 +5668,7 @@ failed_mount: brelse(sbi->s_sbh); if (sbi->s_journal_bdev_file) { invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); - fput(sbi->s_journal_bdev_file); + bdev_fput(sbi->s_journal_bdev_file); } out_fail: invalidate_bdev(sb->s_bdev); @@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb, out_bh: brelse(bh); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb) kill_block_super(sb); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); } static struct file_system_type ext4_fs_type = { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6867f26f141..a4bc26dfdb1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { if (i > 0) - fput(FDEV(i).bdev_file); + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b6cad106c37e..0b2da7b7e2ad 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -310,6 +310,10 @@ struct cuse_init_args { /** * cuse_process_init_reply - finish initializing CUSE channel * + * @fm: The fuse mount information containing the CUSE connection. + * @args: The arguments passed to the init reply. + * @error: The error code signifying if any error occurred during the process. + * * This function creates the character device and sets up all the * required data structures for it. Please read the comment at the * top of this file for high level overview. diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4a6df591add6..2b0d4781f394 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1321,6 +1321,7 @@ retry: err = fuse_do_statx(inode, file, stat); if (err == -ENOSYS) { fc->no_statx = 1; + err = 0; goto retry; } } else { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a56e7bffd000..b57ce4157640 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1362,7 +1362,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, bool *exclusive) { struct inode *inode = file_inode(iocb->ki_filp); - struct fuse_file *ff = iocb->ki_filp->private_data; + struct fuse_inode *fi = get_fuse_inode(inode); *exclusive = fuse_dio_wr_exclusive_lock(iocb, from); if (*exclusive) { @@ -1377,7 +1377,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, * have raced, so check it again. */ if (fuse_io_past_eof(iocb, from) || - fuse_file_uncached_io_start(inode, ff, NULL) != 0) { + fuse_inode_uncached_io_start(fi, NULL) != 0) { inode_unlock_shared(inode); inode_lock(inode); *exclusive = true; @@ -1388,13 +1388,13 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) { struct inode *inode = file_inode(iocb->ki_filp); - struct fuse_file *ff = iocb->ki_filp->private_data; + struct fuse_inode *fi = get_fuse_inode(inode); if (exclusive) { inode_unlock(inode); } else { /* Allow opens in caching mode after last parallel dio end */ - fuse_file_uncached_io_end(inode, ff); + fuse_inode_uncached_io_end(fi); inode_unlock_shared(inode); } } @@ -2574,8 +2574,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) * First mmap of direct_io file enters caching inode io mode. * Also waits for parallel dio writers to go into serial mode * (exclusive instead of shared lock). + * After first mmap, the inode stays in caching io mode until + * the direct_io file release. */ - rc = fuse_file_cached_io_start(inode, ff); + rc = fuse_file_cached_io_open(inode, ff); if (rc) return rc; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b24084b60864..f23919610313 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1394,9 +1394,10 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* iomode.c */ -int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff); -int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb); -void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff); +int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff); +int fuse_inode_uncached_io_start(struct fuse_inode *fi, + struct fuse_backing *fb); +void fuse_inode_uncached_io_end(struct fuse_inode *fi); int fuse_file_io_open(struct file *file, struct inode *inode); void fuse_file_io_release(struct fuse_file *ff, struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3a5d88878335..99e44ea7d875 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -175,6 +175,7 @@ static void fuse_evict_inode(struct inode *inode) } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { + WARN_ON(fi->iocachectr != 0); WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c index c653ddcf0578..c99e285f3183 100644 --- a/fs/fuse/iomode.c +++ b/fs/fuse/iomode.c @@ -21,12 +21,13 @@ static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi) } /* - * Start cached io mode. + * Called on cached file open() and on first mmap() of direct_io file. + * Takes cached_io inode mode reference to be dropped on file release. * * Blocks new parallel dio writes and waits for the in-progress parallel dio * writes to complete. */ -int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff) +int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -67,10 +68,9 @@ int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff) return 0; } -static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff) +static void fuse_file_cached_io_release(struct fuse_file *ff, + struct fuse_inode *fi) { - struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fi->lock); WARN_ON(fi->iocachectr <= 0); WARN_ON(ff->iomode != IOM_CACHED); @@ -82,16 +82,15 @@ static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff) } /* Start strictly uncached io mode where cache access is not allowed */ -int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb) +int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb) { - struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_backing *oldfb; int err = 0; spin_lock(&fi->lock); /* deny conflicting backing files on same fuse inode */ oldfb = fuse_inode_backing(fi); - if (oldfb && oldfb != fb) { + if (fb && oldfb && oldfb != fb) { err = -EBUSY; goto unlock; } @@ -99,12 +98,10 @@ int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struc err = -ETXTBSY; goto unlock; } - WARN_ON(ff->iomode != IOM_NONE); fi->iocachectr--; - ff->iomode = IOM_UNCACHED; /* fuse inode holds a single refcount of backing file */ - if (!oldfb) { + if (fb && !oldfb) { oldfb = fuse_inode_backing_set(fi, fb); WARN_ON_ONCE(oldfb != NULL); } else { @@ -115,15 +112,29 @@ unlock: return err; } -void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff) +/* Takes uncached_io inode mode reference to be dropped on file release */ +static int fuse_file_uncached_io_open(struct inode *inode, + struct fuse_file *ff, + struct fuse_backing *fb) { struct fuse_inode *fi = get_fuse_inode(inode); + int err; + + err = fuse_inode_uncached_io_start(fi, fb); + if (err) + return err; + + WARN_ON(ff->iomode != IOM_NONE); + ff->iomode = IOM_UNCACHED; + return 0; +} + +void fuse_inode_uncached_io_end(struct fuse_inode *fi) +{ struct fuse_backing *oldfb = NULL; spin_lock(&fi->lock); WARN_ON(fi->iocachectr >= 0); - WARN_ON(ff->iomode != IOM_UNCACHED); - ff->iomode = IOM_NONE; fi->iocachectr++; if (!fi->iocachectr) { wake_up(&fi->direct_io_waitq); @@ -134,6 +145,15 @@ void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff) fuse_backing_put(oldfb); } +/* Drop uncached_io reference from passthrough open */ +static void fuse_file_uncached_io_release(struct fuse_file *ff, + struct fuse_inode *fi) +{ + WARN_ON(ff->iomode != IOM_UNCACHED); + ff->iomode = IOM_NONE; + fuse_inode_uncached_io_end(fi); +} + /* * Open flags that are allowed in combination with FOPEN_PASSTHROUGH. * A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write @@ -163,7 +183,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file) return PTR_ERR(fb); /* First passthrough file open denies caching inode io mode */ - err = fuse_file_uncached_io_start(inode, ff, fb); + err = fuse_file_uncached_io_open(inode, ff, fb); if (!err) return 0; @@ -216,7 +236,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode) if (ff->open_flags & FOPEN_PASSTHROUGH) err = fuse_file_passthrough_open(inode, file); else - err = fuse_file_cached_io_start(inode, ff); + err = fuse_file_cached_io_open(inode, ff); if (err) goto fail; @@ -236,8 +256,10 @@ fail: /* No more pending io and no new io possible to inode via open/mmapped file */ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode) { + struct fuse_inode *fi = get_fuse_inode(inode); + /* - * Last parallel dio close allows caching inode io mode. + * Last passthrough file close allows caching inode io mode. * Last caching file close exits caching inode io mode. */ switch (ff->iomode) { @@ -245,10 +267,10 @@ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode) /* Nothing to do */ break; case IOM_UNCACHED: - fuse_file_uncached_io_end(inode, ff); + fuse_file_uncached_io_release(ff, fi); break; case IOM_CACHED: - fuse_file_cached_io_end(inode, ff); + fuse_file_cached_io_release(ff, fi); break; } } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 789af5c8fade..aa1626955b2c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1718,7 +1718,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1729,7 +1730,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate. diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 73389c68e251..9609349e92e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - fput(bdev_file); + bdev_fput(bdev_file); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev_file = log->bdev_file; rc = lmLogShutdown(log); - fput(bdev_file); + bdev_fput(bdev_file); kfree(log); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e9df2f87072c..8502ef68459b 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -636,11 +636,18 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) * each file a separate locking class. Let's differentiate on * whether the file has mmap or not for now. * - * Both paths of the branch look the same. They're supposed to + * For similar reasons, writable and readonly files are given different + * lockdep key, because the writable file /sys/power/resume may call vfs + * lookup helpers for arbitrary paths and readonly files can be read by + * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs. + * + * All three cases look the same. They're supposed to * look that way and give @of->mutex different static lockdep keys. */ if (has_mmap) mutex_init(&of->mutex); + else if (file->f_mode & FMODE_WRITE) + mutex_init(&of->mutex); else mutex_init(&of->mutex); diff --git a/fs/namei.c b/fs/namei.c index ceb9ddf8dfdd..c5b2a25be7d0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4050,6 +4050,8 @@ retry: case 0: case S_IFREG: error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); + if (!error) + security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, @@ -4060,11 +4062,6 @@ retry: dentry, mode, 0); break; } - - if (error) - goto out2; - - security_path_post_mknod(idmap, dentry); out2: done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1a93c7fcf76c..84d4093ca713 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3042,12 +3042,9 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static int @@ -3831,15 +3828,20 @@ nfsd4_create_session(struct svc_rqst *rqstp, else cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status) { - if (status == nfserr_replay_cache) { - status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out_free_conn; - } + switch (status) { + case nfs_ok: + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + break; + case nfserr_replay_cache: + status = nfsd4_replay_create_session(cr_ses, cs_slot); + fallthrough; + case nfserr_jukebox: + /* The server MUST NOT cache NFS4ERR_DELAY */ + goto out_free_conn; + default: goto out_cache_error; } - cs_slot->sl_seqid++; - cr_ses->seqid = cs_slot->sl_seqid; /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { @@ -3859,10 +3861,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) { - old = NULL; - goto out_cache_error; - } + if (status) + goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); @@ -3894,6 +3894,17 @@ nfsd4_create_session(struct svc_rqst *rqstp, expire_client(old); return status; +out_expired_error: + old = NULL; + /* + * Revert the slot seq_nr change so the server will process + * the client's resend instead of returning a cached response. + */ + if (status == nfserr_jukebox) { + cs_slot->sl_seqid--; + cr_ses->seqid = cs_slot->sl_seqid; + goto out_free_conn; + } out_cache_error: nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: @@ -6602,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn) list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); + kref_get(&clp->cl_nfsdfs.cl_ref); set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fac938f563ad..1955481832e0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct dentry *dentry, const u32 *bmval, int ignore_crossmnt) { + DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; int starting_len = xdr->buf->len; __be32 *attrlen_p, status; int attrlen_offset; + u32 attrmask[3]; int err; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; @@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, .mnt = exp->ex_path.mnt, .dentry = dentry, }; - union { - u32 attrmask[3]; - unsigned long mask[2]; - } u; unsigned long bit; bool file_modified = false; u64 size = 0; @@ -3521,20 +3519,19 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, /* * Make a local copy of the attribute bitmap that can be modified. */ - memset(&u, 0, sizeof(u)); - u.attrmask[0] = bmval[0]; - u.attrmask[1] = bmval[1]; - u.attrmask[2] = bmval[2]; + attrmask[0] = bmval[0]; + attrmask[1] = bmval[1]; + attrmask[2] = bmval[2]; args.rdattr_err = 0; if (exp->ex_fslocs.migrated) { - status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1], - &u.attrmask[2], &args.rdattr_err); + status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1], + &attrmask[2], &args.rdattr_err); if (status) goto out; } args.size = 0; - if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { + if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), &file_modified, &size); if (status) @@ -3553,16 +3550,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ - u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; - if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; + if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || - (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &args.statfs); if (err) goto out_nfserr; } - if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && + if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); status = nfserr_jukebox; @@ -3577,10 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.fhp = fhp; args.acl = NULL; - if (u.attrmask[0] & FATTR4_WORD0_ACL) { + if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) - u.attrmask[0] &= ~FATTR4_WORD0_ACL; + attrmask[0] &= ~FATTR4_WORD0_ACL; else if (err == -EINVAL) { status = nfserr_attrnotsupp; goto out; @@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, #ifdef CONFIG_NFSD_V4_SECURITY_LABEL args.context = NULL; - if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || - u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || + attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), &args.context, &args.contextlen); else err = -EOPNOTSUPP; args.contextsupport = (err == 0); - if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { + if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { if (err == -EOPNOTSUPP) - u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; else if (err) goto out_nfserr; } @@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ /* attrmask */ - status = nfsd4_encode_bitmap4(xdr, u.attrmask[0], - u.attrmask[1], u.attrmask[2]); + status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1], + attrmask[2]); if (status) goto out; @@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); if (!attrlen_p) goto out_resource; - for_each_set_bit(bit, (const unsigned long *)&u.mask, + bitmap_from_arr32(attr_bitmap, attrmask, + ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + for_each_set_bit(bit, attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) { status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args); if (status != nfs_ok) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6a9464262fae..2e41eb4c3cec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1852,7 +1852,7 @@ retry: trap = lock_rename(tdentry, fdentry); if (IS_ERR(trap)) { err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - goto out; + goto out_want_write; } err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) @@ -1922,6 +1922,7 @@ retry: } out_unlock: unlock_rename(tdentry, fdentry); +out_want_write: fh_drop_write(ffhp); /* diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index bc846b904b68..aee40db7a036 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = { #define S_SHIFT 12 static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { +nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, diff --git a/fs/proc/Makefile b/fs/proc/Makefile index bd08616ed8ba..7b4db9c56e6a 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -5,7 +5,7 @@ obj-y += proc.o -CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,) +CFLAGS_task_mmu.o += -Wno-override-init proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := task_mmu.o diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c index 902b326e1e56..87dcaae32ff8 100644 --- a/fs/proc/bootconfig.c +++ b/fs/proc/bootconfig.c @@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) break; dst += ret; } - if (ret >= 0 && boot_command_line[0]) { - ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n", - boot_command_line); - if (ret > 0) - dst += ret; - } + } + if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) { + ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n", + boot_command_line); + if (ret > 0) + dst += ret; } out: kfree(key); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 6474529c4253..e539ccd39e1e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb) static void release_journal_dev(struct reiserfs_journal *journal) { if (journal->j_bdev_file) { - fput(journal->j_bdev_file); + bdev_fput(journal->j_bdev_file); journal->j_bdev_file = NULL; } } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 2be227532f39..2cbb92462074 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb) #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } #endif } diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index a0017724d523..0ff2491c311d 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); + int rc; spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { @@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref) cfid->dentry = NULL; if (cfid->is_open) { - SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc) /* should we retry on -EBUSY or -EAGAIN? */ + cifs_dbg(VFS, "close cached dir rc %d\n", rc); } free_cached_dir(cfid); diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 226d4835c92d..c71ae5c04306 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { @@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, } #endif /* CONFIG_CIFS_STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { atomic_set(&tcon->num_smbs_sent, 0); spin_lock(&tcon->stat_lock); @@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) } #endif /* STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { i++; seq_printf(m, "\n%d) %s", i, tcon->tree_name); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index aa6f1ecb7c0e..d41eedbff674 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *serverclose_wq; __u32 cifs_lock_secret; /* @@ -1888,6 +1889,13 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } + serverclose_wq = alloc_workqueue("serverclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!serverclose_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) goto out_destroy_deferredclose_wq; @@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; @@ -1991,6 +2001,7 @@ exit_cifs(void) destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(serverclose_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7ed9d05f6890..d6669ce4ae87 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -442,10 +442,10 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - void (*close)(const unsigned int, struct cifs_tcon *, + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); @@ -1077,6 +1077,7 @@ struct cifs_ses { and after mount option parsing we fill it */ char *domainName; char *password; + char *password2; /* When key rotation used, new password may be set before it expires */ char workstation_name[CIFS_MAX_WORKSTATION_LEN]; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ @@ -1281,7 +1282,6 @@ struct cifs_tcon { struct cached_fids *cfids; /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL - struct list_head dfs_ses_list; struct delayed_work dfs_cache_work; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ @@ -1440,6 +1440,7 @@ struct cifsFileInfo { bool swapfile:1; bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ + bool offload:1; /* offload final part of _put to a wq */ unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; @@ -1448,6 +1449,7 @@ struct cifsFileInfo { struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ + struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; @@ -1804,7 +1806,6 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct list_head dfs_ses_list; }; static inline void __free_dfs_info_param(struct dfs_info3_param *param) @@ -2105,6 +2106,7 @@ extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; +extern struct workqueue_struct *serverclose_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; @@ -2324,4 +2326,14 @@ struct smb2_compound_vars { struct kvec ea_iov; }; +static inline bool cifs_ses_exiting(struct cifs_ses *ses) +{ + bool ret; + + spin_lock(&ses->ses_lock); + ret = ses->ses_status == SES_EXITING; + spin_unlock(&ses->ses_lock); + return ret; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 0723e1b57256..8e0a348f1f66 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); -/* Put references of @ses and @ses->dfs_root_ses */ +/* Put references of @ses and its children */ static inline void cifs_put_smb_ses(struct cifs_ses *ses) { - struct cifs_ses *rses = ses->dfs_root_ses; + struct cifs_ses *next; - __cifs_put_smb_ses(ses); - if (rses) - __cifs_put_smb_ses(rses); + do { + next = ses->dfs_root_ses; + __cifs_put_smb_ses(ses); + } while ((ses = next)); } -/* Get an active reference of @ses and @ses->dfs_root_ses. +/* Get an active reference of @ses and its children. * * NOTE: make sure to call this function when incrementing reference count of * @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses) * will also get its reference count incremented. * - * cifs_put_smb_ses() will put both references, so call it when you're done. + * cifs_put_smb_ses() will put all references, so call it when you're done. */ static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses) { lockdep_assert_held(&cifs_tcp_ses_lock); - ses->ses_count++; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; + for (; ses; ses = ses->dfs_root_ses) + ses->ses_count++; } static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 5aee55551573..23b5709ddc31 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5854,10 +5854,8 @@ SetEARetry: parm_data->list.EA_flags = 0; /* we checked above that name len is less than 255 */ parm_data->list.name_len = (__u8)name_len; - /* EA names are always ASCII */ - if (ea_name) - strncpy(parm_data->list.name, ea_name, name_len); - parm_data->list.name[name_len] = '\0'; + /* EA names are always ASCII and NUL-terminated */ + strscpy(parm_data->list.name, ea_name ?: "", name_len + 1); parm_data->list.value_len = cpu_to_le16(ea_value_len); /* caller ensures that ea_value_len is less than 64K but we need to ensure that it fits within the smb */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 9b85b5341822..4e35970681bf 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { if (!ses->chans[i].server) @@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { - /* check if iface is still active */ + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); + continue; + } + spin_unlock(&ses->ses_lock); + spin_lock(&ses->chan_lock); if (cifs_ses_get_chan_index(ses, server) == CIFS_INVAL_CHAN_INDEX) { @@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) ctx->sectype != ses->sectype) return 0; + if (ctx->dfs_root_ses != ses->dfs_root_ses) + return 0; + /* * If an existing session is limited to less channels than * requested, it should not be reused @@ -1963,31 +1974,6 @@ out: return rc; } -/** - * cifs_free_ipc - helper to release the session IPC tcon - * @ses: smb session to unmount the IPC from - * - * Needs to be called everytime a session is destroyed. - * - * On session close, the IPC is closed and the server must release all tcons of the session. - * No need to send a tree disconnect here. - * - * Besides, it will make the server to not close durable and resilient files on session close, as - * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request. - */ -static int -cifs_free_ipc(struct cifs_ses *ses) -{ - struct cifs_tcon *tcon = ses->tcon_ipc; - - if (tcon == NULL) - return 0; - - tconInfoFree(tcon); - ses->tcon_ipc = NULL; - return 0; -} - static struct cifs_ses * cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { @@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) void __cifs_put_smb_ses(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; + struct cifs_tcon *tcon; unsigned int xid; size_t i; + bool do_logoff; int rc; - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING) { - spin_unlock(&ses->ses_lock); - return; - } - spin_unlock(&ses->ses_lock); - - cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); - cifs_dbg(FYI, - "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE"); - spin_lock(&cifs_tcp_ses_lock); - if (--ses->ses_count > 0) { + spin_lock(&ses->ses_lock); + cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n", + __func__, ses->Suid, ses->ses_count, ses->ses_status, + ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none"); + if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) { + spin_unlock(&ses->ses_lock); spin_unlock(&cifs_tcp_ses_lock); return; } - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_GOOD) - ses->ses_status = SES_EXITING; - spin_unlock(&ses->ses_lock); - spin_unlock(&cifs_tcp_ses_lock); - /* ses_count can never go negative */ WARN_ON(ses->ses_count < 0); - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING && server->ops->logoff) { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); + spin_lock(&ses->chan_lock); + cifs_chan_clear_need_reconnect(ses, server); + spin_unlock(&ses->chan_lock); + + do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff; + ses->ses_status = SES_EXITING; + tcon = ses->tcon_ipc; + ses->tcon_ipc = NULL; + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + + /* + * On session close, the IPC is closed and the server must release all + * tcons of the session. No need to send a tree disconnect here. + * + * Besides, it will make the server to not close durable and resilient + * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an + * SMB2 LOGOFF Request. + */ + tconInfoFree(tcon); + if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); if (rc) cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n", __func__, rc); _free_xid(xid); - } else { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); } spin_lock(&cifs_tcp_ses_lock); @@ -2193,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) } ++delim; + /* BB consider adding support for password2 (Key Rotation) for multiuser in future */ ctx->password = kstrndup(delim, len, GFP_KERNEL); if (!ctx->password) { cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n", @@ -2216,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) kfree(ctx->username); ctx->username = NULL; kfree_sensitive(ctx->password); + /* no need to free ctx->password2 since not allocated in this path */ ctx->password = NULL; goto out_key_put; } @@ -2327,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) if (!ses->password) goto get_ses_fail; } + /* ctx->password freed at unmount */ + if (ctx->password2) { + ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); + if (!ses->password2) + goto get_ses_fail; + } if (ctx->domainname) { ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL); if (!ses->domainName) @@ -2373,9 +2371,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + if (ctx->dfs_root_ses) + cifs_smb_ses_inc_refcount(ctx->dfs_root_ses); ses->dfs_root_ses = ctx->dfs_root_ses; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3326,6 +3324,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) cifs_put_tcp_session(mnt_ctx->server, 0); + mnt_ctx->ses = NULL; + mnt_ctx->tcon = NULL; + mnt_ctx->server = NULL; mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; free_xid(mnt_ctx->xid); } @@ -3604,8 +3605,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); - rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3636,7 +3635,6 @@ out: return rc; error: - dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); cifs_mount_put_conns(&mnt_ctx); return rc; } @@ -3651,6 +3649,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; rc = cifs_mount_get_tcon(&mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx.server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx.ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx.tcon)) + rc = -ENOENT; + } if (rc) goto error; @@ -3988,13 +3998,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses) } static struct cifs_tcon * -cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) { int rc; struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; struct smb3_fs_context *ctx; + char *origin_fullpath = NULL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -4018,6 +4029,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ctx->sign = master_tcon->ses->sign; ctx->seal = master_tcon->seal; ctx->witness = master_tcon->use_witness; + ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses; rc = cifs_set_vol_auth(ctx, master_tcon->ses); if (rc) { @@ -4037,12 +4049,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + spin_lock(&master_tcon->tc_lock); + if (master_tcon->origin_fullpath) { + spin_unlock(&master_tcon->tc_lock); + origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source); + if (IS_ERR(origin_fullpath)) { + tcon = ERR_CAST(origin_fullpath); + origin_fullpath = NULL; + cifs_put_smb_ses(ses); + goto out; + } + } else { + spin_unlock(&master_tcon->tc_lock); + } +#endif + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { cifs_put_smb_ses(ses); goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + if (origin_fullpath) { + spin_lock(&tcon->tc_lock); + tcon->origin_fullpath = origin_fullpath; + spin_unlock(&tcon->tc_lock); + origin_fullpath = NULL; + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + dfs_cache_get_ttl() * HZ); + } +#endif + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, ctx); @@ -4051,11 +4090,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) out: kfree(ctx->username); kfree_sensitive(ctx->password); + kfree(origin_fullpath); kfree(ctx); return tcon; } +static struct cifs_tcon * +cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +{ + struct cifs_tcon *ret; + + cifs_mount_lock(); + ret = __cifs_construct_tcon(cifs_sb, fsuid); + cifs_mount_unlock(); + return ret; +} + struct cifs_tcon * cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) { diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 449c59830039..3ec965547e3d 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) } /* - * Track individual DFS referral servers used by new DFS mount. - * - * On success, their lifetime will be shared by final tcon (dfs_ses_list). - * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount(). + * Get an active reference of @ses so that next call to cifs_put_tcon() won't + * release it as any new DFS referrals must go through its IPC tcon. */ -static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) +static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct dfs_root_ses *root_ses; struct cifs_ses *ses = mnt_ctx->ses; if (ses) { - root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); - if (!root_ses) - return -ENOMEM; - - INIT_LIST_HEAD(&root_ses->list); - spin_lock(&cifs_tcp_ses_lock); cifs_smb_ses_inc_refcount(ses); spin_unlock(&cifs_tcp_ses_lock); - root_ses->ses = ses; - list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - /* Select new DFS referral server so that new referrals go through it */ ctx->dfs_root_ses = ses; - return 0; } static inline int parse_dfs_target(struct smb3_fs_context *ctx, @@ -185,11 +172,8 @@ again: continue; } - if (is_refsrv) { - rc = add_root_smb_session(mnt_ctx); - if (rc) - goto out; - } + if (is_refsrv) + add_root_smb_session(mnt_ctx); rc = ref_walk_advance(rw); if (!rc) { @@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_tcon *tcon; char *origin_fullpath; + bool new_tcon = true; int rc; origin_fullpath = dfs_get_path(cifs_sb, ctx->source); @@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) return PTR_ERR(origin_fullpath); rc = dfs_referral_walk(mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx->server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx->ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx->tcon)) + rc = -ENOENT; + } if (rc) goto out; @@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!tcon->origin_fullpath) { tcon->origin_fullpath = origin_fullpath; origin_fullpath = NULL; + } else { + new_tcon = false; } spin_unlock(&tcon->tc_lock); - if (list_empty(&tcon->dfs_ses_list)) { - list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list); + if (new_tcon) { queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, dfs_cache_get_ttl() * HZ); - } else { - dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); } out: @@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) if (rc) return rc; - ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. @@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) *isdfs = true; add_root_smb_session(mnt_ctx); - return __dfs_mount_share(mnt_ctx); + rc = __dfs_mount_share(mnt_ctx); + dfs_put_root_smb_sessions(mnt_ctx); + return rc; } /* Update dfs referral path of superblock */ diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index 875ab7ae57fc..e5c4dcf83750 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -7,7 +7,9 @@ #define _CIFS_DFS_H #include "cifsglob.h" +#include "cifsproto.h" #include "fs_context.h" +#include "dfs_cache.h" #include "cifs_unicode.h" #include @@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw) ref_walk_tit(rw)); } -struct dfs_root_ses { - struct list_head list; - struct cifs_ses *ses; -}; - int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses; - return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } -static inline void dfs_put_root_smb_sessions(struct list_head *head) +/* + * cifs_get_smb_ses() already guarantees an active reference of + * @ses->dfs_root_ses when a new session is created, so we need to put extra + * references of all DFS root sessions that were used across the mount process + * in dfs_mount_share(). + */ +static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx) { - struct dfs_root_ses *root, *tmp; + const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_ses *ses = ctx->dfs_root_ses; + struct cifs_ses *cur; - list_for_each_entry_safe(root, tmp, head, list) { - list_del_init(&root->list); - cifs_put_smb_ses(root->ses); - kfree(root); + if (!ses) + return; + + for (cur = ses; cur; cur = cur->dfs_root_ses) { + if (cur->dfs_root_ses) + cifs_put_smb_ses(cur->dfs_root_ses); } + cifs_put_smb_ses(ses); } #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 508d831fabe3..11c8efecf7aa 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -/* Refresh dfs referral of tcon and mark it for reconnect if needed */ -static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh) +/* Refresh dfs referral of @ses and mark it for reconnect if needed */ +static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh) { struct TCP_Server_Info *server = ses->server; DFS_CACHE_TGT_LIST(old_tl); @@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref bool needs_refresh = false; struct cache_entry *ce; unsigned int xid; + char *path = NULL; int rc = 0; xid = get_xid(); + mutex_lock(&server->refpath_lock); + if (server->leaf_fullpath) { + path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC); + if (!path) + rc = -ENOMEM; + } + mutex_unlock(&server->refpath_lock); + if (!path) + goto out; + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); @@ -1218,19 +1229,17 @@ out: free_xid(xid); dfs_cache_free_tgts(&old_tl); dfs_cache_free_tgts(&new_tl); - return rc; + kfree(path); } -static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh) +static inline void refresh_ses_referral(struct cifs_ses *ses) { - struct TCP_Server_Info *server = tcon->ses->server; - struct cifs_ses *ses = tcon->ses; + __refresh_ses_referral(ses, false); +} - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh); - mutex_unlock(&server->refpath_lock); - return 0; +static inline void force_refresh_ses_referral(struct cifs_ses *ses) +{ + __refresh_ses_referral(ses, true); } /** @@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - return refresh_tcon(tcon, true); + force_refresh_ses_referral(tcon->ses); + return 0; } /* Refresh all DFS referrals related to DFS tcon */ void dfs_cache_refresh(struct work_struct *work) { - struct TCP_Server_Info *server; - struct dfs_root_ses *rses; struct cifs_tcon *tcon; struct cifs_ses *ses; tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); - ses = tcon->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - - list_for_each_entry(rses, &tcon->dfs_ses_list, list) { - ses = rses->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - } + for (ses = tcon->ses; ses; ses = ses->dfs_root_ses) + refresh_ses_referral(ses); queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, atomic_read(&dfs_cache_ttl) * HZ); diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 89333d9bce36..864b194dbaa0 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; *oplock = 0; if (tcon->ses->server->oplocks) @@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned return PTR_ERR(full_path); } + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) desired_access |= GENERIC_WRITE; + if (rdwr_for_fscache == 1) + desired_access |= GENERIC_READ; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access &= ~GENERIC_READ; + rdwr_for_fscache = 2; + goto retry_open; + } goto out; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* @@ -612,11 +627,18 @@ int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode, goto mknod_out; } + trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path); + rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon, full_path, mode, device_number); mknod_out: + if (rc) + trace_smb3_mknod_err(xid, tcon->ses->Suid, tcon->tid, rc); + else + trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid); + free_dentry_path(page); free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 16aadce492b2..9be37d0fe724 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) @@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index bdcbe6ff2739..6c727d8c31e8 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -37,7 +37,7 @@ #include "rfc1002pdu.h" #include "fs_context.h" -static DEFINE_MUTEX(cifs_mount_mutex); +DEFINE_MUTEX(cifs_mount_mutex); static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, @@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_string("username", Opt_user), fsparam_string("pass", Opt_pass), fsparam_string("password", Opt_pass), + fsparam_string("password2", Opt_pass2), fsparam_string("ip", Opt_ip), fsparam_string("addr", Opt_ip), fsparam_string("domain", Opt_domain), @@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx new_ctx->nodename = NULL; new_ctx->username = NULL; new_ctx->password = NULL; + new_ctx->password2 = NULL; new_ctx->server_hostname = NULL; new_ctx->domainname = NULL; new_ctx->UNC = NULL; @@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(prepath); DUP_CTX_STR(username); DUP_CTX_STR(password); + DUP_CTX_STR(password2); DUP_CTX_STR(server_hostname); DUP_CTX_STR(UNC); DUP_CTX_STR(source); @@ -783,9 +786,9 @@ static int smb3_get_tree(struct fs_context *fc) if (err) return err; - mutex_lock(&cifs_mount_mutex); + cifs_mount_lock(); ret = smb3_get_tree_common(fc); - mutex_unlock(&cifs_mount_mutex); + cifs_mount_unlock(); return ret; } @@ -905,6 +908,8 @@ static int smb3_reconfigure(struct fs_context *fc) else { kfree_sensitive(ses->password); ses->password = kstrdup(ctx->password, GFP_KERNEL); + kfree_sensitive(ses->password2); + ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); } STEAL_STRING(cifs_sb, ctx, domainname); STEAL_STRING(cifs_sb, ctx, nodename); @@ -1305,6 +1310,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } break; + case Opt_pass2: + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; + if (strlen(param->string) == 0) + break; + + ctx->password2 = kstrdup(param->string, GFP_KERNEL); + if (ctx->password2 == NULL) { + cifs_errorf(fc, "OOM when copying password2 string\n"); + goto cifs_parse_mount_err; + } + break; case Opt_ip: if (strlen(param->string) == 0) { ctx->got_ip = false; @@ -1608,6 +1625,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_parse_mount_err: kfree_sensitive(ctx->password); ctx->password = NULL; + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; return -EINVAL; } @@ -1713,6 +1732,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->username = NULL; kfree_sensitive(ctx->password); ctx->password = NULL; + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; kfree(ctx->server_hostname); ctx->server_hostname = NULL; kfree(ctx->UNC); diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 7863f2248c4d..a947bddeba27 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -145,6 +145,7 @@ enum cifs_param { Opt_source, Opt_user, Opt_pass, + Opt_pass2, Opt_ip, Opt_domain, Opt_srcaddr, @@ -177,6 +178,7 @@ struct smb3_fs_context { char *username; char *password; + char *password2; char *domainname; char *source; char *server_hostname; @@ -304,4 +306,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); #define MAX_CACHED_FIDS 16 extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); +extern struct mutex cifs_mount_mutex; + +static inline void cifs_mount_lock(void) +{ + mutex_lock(&cifs_mount_mutex); +} + +static inline void cifs_mount_unlock(void) +{ + mutex_unlock(&cifs_mount_mutex); +} + #endif diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index c4a3cb736881..340efce8f052 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -12,6 +12,16 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" +/* + * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). + */ +struct cifs_fscache_inode_key { + + __le64 uniqueid; /* server inode number */ + __le64 createtime; /* creation time on server */ + u8 type; /* S_IFMT file type */ +} __packed; + static void cifs_fscache_fill_volume_coherency( struct cifs_tcon *tcon, struct cifs_fscache_volume_coherency_data *cd) @@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_get_inode_cookie(struct inode *inode) { struct cifs_fscache_inode_coherency_data cd; + struct cifs_fscache_inode_key key; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + key.uniqueid = cpu_to_le64(cifsi->uniqueid); + key.createtime = cpu_to_le64(cifsi->createtime); + key.type = (inode->i_mode & S_IFMT) >> 12; cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); cifsi->netfs.cache = fscache_acquire_cookie(tcon->fscache, 0, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &key, sizeof(key), &cd, sizeof(cd), i_size_read(&cifsi->netfs.inode)); if (cifsi->netfs.cache) diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index a3d73720914f..1f2ea9f5cc9a 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode, __cifs_readahead_to_fscache(inode, pos, len); } +static inline bool cifs_fscache_enabled(struct inode *inode) +{ + return fscache_cookie_enabled(cifs_inode_cookie(inode)); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline void cifs_fscache_fill_coherency(struct inode *inode, @@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} +static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } static inline int cifs_fscache_query_occupancy(struct inode *inode, pgoff_t first, unsigned int nr_pages, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d28ab0af6049..60afab5c83d4 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, } else { cifs_open_info_to_fattr(fattr, data, sb); } - if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING) + if (!rc && *inode && + (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)) cifs_mark_open_handles_for_deleted_file(*inode, full_path); break; case -EREMOTE: @@ -1351,6 +1352,8 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = opaque; + /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index c012dfdba80d..855ac5a62edf 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) { - if (ses_it->Suid == out.session_id) { + spin_lock(&ses_it->ses_lock); + if (ses_it->ses_status != SES_EXITING && + ses_it->Suid == out.session_id) { ses = ses_it; /* * since we are using the session outside the crit @@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug * so increment its refcount */ cifs_smb_ses_inc_refcount(ses); + spin_unlock(&ses_it->ses_lock); found = true; goto search_end; } + spin_unlock(&ses_it->ses_lock); } } search_end: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index c3771fc81328..7d15a1969b81 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); kfree_sensitive(buf_to_free->password); + kfree_sensitive(buf_to_free->password2); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); kfree_sensitive(buf_to_free->auth_key.response); @@ -138,9 +139,6 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_LIST_HEAD(&ret_buf->dfs_ses_list); -#endif return ret_buf; } @@ -156,9 +154,6 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_put_root_smb_sessions(&tcon->dfs_ses_list); -#endif kfree(tcon->origin_fullpath); kfree(tcon); } @@ -487,6 +482,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index a9eaba8083b0..212ec6f66ec6 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } -static void +static int cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - CIFSSMBClose(xid, tcon, fid->netfid); + return CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 82b84a4941dd..cc72be5a93a9 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); cifs_stats_inc( @@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 2ed456948f34..78c94d0350fe 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) memcpy(cfile->fid.create_guid, fid->create_guid, 16); } -static void +static int smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } -static void +static int smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) { @@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, &file_inf); if (rc) - return; + return rc; inode = d_inode(cfile->dentry); @@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); + return rc; } static int @@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); @@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, strcat(message, "W"); } if (!new_oplock) - strncpy(message, "None", sizeof(message)); + strscpy(message, "None"); cinode->oplock = new_oplock; cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, @@ -4961,68 +4964,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf, return 0; } -int cifs_sfu_make_node(unsigned int xid, struct inode *inode, - struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev) +static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) { - struct cifs_open_info_data buf = {}; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; struct cifs_io_parms io_parms = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_fid fid; unsigned int bytes_written; - struct win_dev *pdev; + struct win_dev pdev = {}; struct kvec iov[2]; __u32 oplock = server->oplocks ? REQ_OPLOCK : 0; int rc; - if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode)) + switch (mode & S_IFMT) { + case S_IFCHR: + strscpy(pdev.type, "IntxCHR"); + pdev.major = cpu_to_le64(MAJOR(dev)); + pdev.minor = cpu_to_le64(MINOR(dev)); + break; + case S_IFBLK: + strscpy(pdev.type, "IntxBLK"); + pdev.major = cpu_to_le64(MAJOR(dev)); + pdev.minor = cpu_to_le64(MINOR(dev)); + break; + case S_IFIFO: + strscpy(pdev.type, "LnxFIFO"); + break; + default: return -EPERM; + } - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = GENERIC_WRITE, - .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | - CREATE_OPTION_SPECIAL), - .disposition = FILE_CREATE, - .path = full_path, - .fid = &fid, - }; + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE, + FILE_CREATE, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL, ACL_NO_MODE); + oparms.fid = &fid; - rc = server->ops->open(xid, &oparms, &oplock, &buf); + rc = server->ops->open(xid, &oparms, &oplock, NULL); if (rc) return rc; - /* - * BB Do not bother to decode buf since no local inode yet to put - * timestamps in, but we can reuse it safely. - */ - pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; - io_parms.length = sizeof(*pdev); - iov[1].iov_base = pdev; - iov[1].iov_len = sizeof(*pdev); - if (S_ISCHR(mode)) { - memcpy(pdev->type, "IntxCHR", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - } else if (S_ISBLK(mode)) { - memcpy(pdev->type, "IntxBLK", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - } else if (S_ISFIFO(mode)) { - memcpy(pdev->type, "LnxFIFO", 8); - } + io_parms.length = sizeof(pdev); + iov[1].iov_base = &pdev; + iov[1].iov_len = sizeof(pdev); rc = server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); server->ops->close(xid, tcon, &fid); - d_drop(dentry); - /* FIXME: add code here to set EAs */ - cifs_free_open_info(&buf); + return rc; +} + +int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) +{ + struct inode *new = NULL; + int rc; + + rc = __cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); + if (rc) + return rc; + + if (tcon->posix_extensions) { + rc = smb311_posix_get_inode_info(&new, full_path, NULL, + inode->i_sb, xid); + } else if (tcon->unix_ext) { + rc = cifs_get_inode_info_unix(&new, full_path, + inode->i_sb, xid); + } else { + rc = cifs_get_inode_info(&new, full_path, NULL, + inode->i_sb, xid, NULL); + } + if (!rc) + d_instantiate(dentry, new); return rc; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3ea688558e6c..86c647a947cc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -367,6 +367,17 @@ again: } rc = cifs_setup_session(0, ses, server, nls_codepage); + if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) { + /* + * Try alternate password for next reconnect (key rotation + * could be enabled on the server e.g.) if an alternate + * password is available and the current password is expired, + * but do not swap on non pwd related errors like host down + */ + if (ses->password2) + swap(ses->password2, ses->password); + } + if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); rc = -EHOSTDOWN; @@ -3628,9 +3639,9 @@ replay_again: memcpy(&pbuf->network_open_info, &rsp->network_open_info, sizeof(pbuf->network_open_info)); + atomic_dec(&tcon->num_remote_opens); } - atomic_dec(&tcon->num_remote_opens); close_exit: SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 5a3ca62d2f07..1d6e54f7879e 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) } spin_unlock(&server->srv_lock); if (!is_binding && !server->session_estab) { - strncpy(shdr->Signature, "BSRSPYL", 8); + strscpy(shdr->Signature, "BSRSPYL"); return 0; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index f9c1fd32d0b8..5e83cb9da902 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -375,6 +375,7 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter); +DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter); DECLARE_EVENT_CLASS(smb3_inf_compound_done_class, TP_PROTO(unsigned int xid, @@ -415,7 +416,7 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done); - +DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done); DECLARE_EVENT_CLASS(smb3_inf_compound_err_class, TP_PROTO(unsigned int xid, @@ -461,6 +462,7 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err); +DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err); /* * For logging SMB3 Status code and Command for responses which return errors diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 8ca8a45c4c62..686b321c5a8b 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -167,7 +167,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 328a412259dc..a2f0a2edceb8 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index a45f7dca482e..606aa3c5189a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d478fa0c57ab..5723bbf372d7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5857,8 +5857,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index f29bb03f0dc4..8752ac82c557 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -453,6 +455,34 @@ out: return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index aa3411354e66..16bd693d0b3a 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, gid_t i_gid; int err; + inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); + if (inode->i_ino == 0) + return -EINVAL; + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid); if (err) return err; @@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, i_uid_write(inode, i_uid); i_gid_write(inode, i_gid); - inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0); inode_set_atime(inode, inode_get_mtime_sec(inode), 0); inode_set_ctime(inode, inode_get_mtime_sec(inode), 0); diff --git a/fs/super.c b/fs/super.c index 71d9779c42b1..69ce6c600968 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev) return error; } -static void fs_bdev_super_get(void *data) -{ - struct super_block *sb = data; - - spin_lock(&sb_lock); - sb->s_count++; - spin_unlock(&sb_lock); -} - -static void fs_bdev_super_put(void *data) -{ - struct super_block *sb = data; - - put_super(sb); -} - const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, .freeze = fs_bdev_freeze, .thaw = fs_bdev_thaw, - .get_holder = fs_bdev_super_get, - .put_holder = fs_bdev_super_put, }; EXPORT_SYMBOL_GPL(fs_holder_ops); @@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, * writable from userspace even for a read-only block device. */ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - fput(bdev_file); + bdev_fput(bdev_file); return -EACCES; } @@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - fput(bdev_file); + bdev_fput(bdev_file); return -EBUSY; } spin_lock(&sb_lock); @@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb) generic_shutdown_super(sb); if (bdev) { sync_blockdev(bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } } diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 289b57d32c89..d1995e2d6c94 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -463,6 +463,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index dc067eeb6387..894c6ca1e500 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, /** * lookup_file - look up a file in the tracefs filesystem + * @parent_ei: Pointer to the eventfs_inode that represents parent of the file * @dentry: the dentry to look up * @mode: the permission that the file should have. * @attr: saved attributes changed by user @@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei, /** * lookup_dir_entry - look up a dir in the tracefs filesystem * @dentry: the directory to look up + * @pei: Pointer to the parent eventfs_inode if available * @ei: the eventfs_inode that represents the directory to create * * This function will look up a dentry for a directory represented by @@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry) /** * lookup_file_dentry - create a dentry for a file of an eventfs_inode + * @dentry: The parent dentry under which the new file's dentry will be created * @ei: the eventfs_inode that the file will be created under * @idx: the index into the entry_attrs[] of the @ei - * @parent: The parent dentry of the created file. - * @name: The name of the file to create * @mode: The mode of the file. * @data: The data to use to set the inode of the file with on open() * @fops: The fops of the file to be created. * - * Create a dentry for a file of an eventfs_inode @ei and place it into the - * address located at @e_dentry. + * This function creates a dentry for a file associated with an + * eventfs_inode @ei. It uses the entry attributes specified by @idx, + * if available. The file will have the specified @mode and its inode will be + * set up with @data upon open. The file operations will be set to @fops. + * + * Return: Returns a pointer to the newly created file's dentry or an error + * pointer. */ static struct dentry * lookup_file_dentry(struct dentry *dentry, diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index 2307f8037efc..118dedef8ebe 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = { .release = vboxsf_file_release, .fsync = noop_fsync, .splice_read = filemap_splice_read, + .setlease = simple_nosetlease, }; const struct inode_operations vboxsf_reg_iops = { diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index cabe8ac4fefc..ffb1d565da39 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -151,11 +151,11 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (!sbi->nls) { vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); err = -EINVAL; - goto fail_free; + goto fail_destroy_idr; } } - sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL); if (sbi->bdi_id < 0) { err = sbi->bdi_id; goto fail_free; @@ -221,9 +221,10 @@ fail_unmap: vboxsf_unmap_folder(sbi->root); fail_free: if (sbi->bdi_id >= 0) - ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + ida_free(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); +fail_destroy_idr: idr_destroy(&sbi->ino_idr); kfree(sbi); return err; @@ -268,7 +269,7 @@ static void vboxsf_put_super(struct super_block *sb) vboxsf_unmap_folder(sbi->root); if (sbi->bdi_id >= 0) - ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + ida_free(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c index 72ac9320e6a3..9515bbf0b54c 100644 --- a/fs/vboxsf/utils.c +++ b/fs/vboxsf/utils.c @@ -440,7 +440,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, { const char *in; char *out; - size_t out_len; size_t out_bound_len; size_t in_bound_len; @@ -448,7 +447,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, in_bound_len = utf8_len; out = name; - out_len = 0; /* Reserve space for terminating 0 */ out_bound_len = name_bound_len - 1; @@ -469,7 +467,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, out += nb; out_bound_len -= nb; - out_len += nb; } *out = 0; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index d991eec05436..73a4b895de67 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -530,7 +530,8 @@ xfs_validate_sb_common( } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), - XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + XFS_FSB_TO_B(mp, sbp->sb_width), 0, + xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* @@ -1323,8 +1324,10 @@ xfs_sb_get_secondary( } /* - * sunit, swidth, sectorsize(optional with 0) should be all in bytes, - * so users won't be confused by values in error messages. + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users + * won't be confused by values in error messages. This function returns false + * if the stripe geometry is invalid and the caller is unable to repair the + * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( @@ -1332,20 +1335,21 @@ xfs_validate_stripe_geometry( __s64 sunit, __s64 swidth, int sectorsize, + bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); - return false; + goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); - return false; + goto check_override; } if (sectorsize && (int)sunit % sectorsize) { @@ -1353,21 +1357,21 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); - return false; + goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); - return false; + goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); - return false; + goto check_override; } if (sunit && (int)swidth % (int)sunit) { @@ -1375,9 +1379,27 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); - return false; + goto check_override; } return true; + +check_override: + if (!may_repair) + return false; + /* + * During mount, mp->m_dalign will not be set unless the sunit mount + * option was set. If it was set, ignore the bad stripe alignment values + * and allow the validation and overwrite later in the mount process to + * attempt to overwrite the bad stripe alignment values with the values + * supplied by mount options. + */ + if (!mp->m_dalign) + return false; + if (!silent) + xfs_notice(mp, +"Will try to correct with specified mount options sunit (%d) and swidth (%d)", + BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); + return true; } /* diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 2e8e8d63d4eb..37b1ed1bc209 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); -extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, - __s64 sunit, __s64 swidth, int sectorsize, bool silent); +bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, + bool silent); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index abff79a77c72..47a20cf5205f 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1044,9 +1044,7 @@ xchk_irele( struct xfs_scrub *sc, struct xfs_inode *ip) { - if (current->journal_info != NULL) { - ASSERT(current->journal_info == sc->tp); - + if (sc->tp) { /* * If we are in a transaction, we /cannot/ drop the inode * ourselves, because the VFS will trigger writeback, which diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1698507d1ac7..3f428620ebf2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -503,13 +503,6 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; - /* - * Writing back data in a transaction context can result in recursive - * transactions. This is bad, so issue a warning and get out of here. - */ - if (WARN_ON_ONCE(current->journal_info)) - return 0; - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a18c381127e..f0fa02264eda 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2030,7 +2030,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - fput(btp->bt_bdev_file); + bdev_fput(btp->bt_bdev_file); kfree(btp); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index e64265bc0b33..74f1812b03cb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -2039,8 +2039,10 @@ xfs_inodegc_want_queue_work( * - Memory shrinkers queued the inactivation worker and it hasn't finished. * - The queue depth exceeds the maximum allowable percpu backlog. * - * Note: If the current thread is running a transaction, we don't ever want to - * wait for other transactions because that could introduce a deadlock. + * Note: If we are in a NOFS context here (e.g. current thread is running a + * transaction) the we don't want to block here as inodegc progress may require + * filesystem resources we hold to make progress and that could result in a + * deadlock. Hence we skip out of here if we are in a scoped NOFS context. */ static inline bool xfs_inodegc_want_flush_work( @@ -2048,7 +2050,7 @@ xfs_inodegc_want_flush_work( unsigned int items, unsigned int shrinker_hits) { - if (current->journal_info) + if (current->flags & PF_MEMALLOC_NOFS) return false; if (shrinker_hits > 0) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ea48774f6b76..d55b42b2480d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1301,8 +1301,19 @@ xfs_link( */ if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && tdp->i_projid != sip->i_projid)) { - error = -EXDEV; - goto error_return; + /* + * Project quota setup skips special files which can + * leave inodes in a PROJINHERIT directory without a + * project ID set. We need to allow links to be made + * to these "project-less" inodes because userspace + * expects them to succeed after project ID setup, + * but everything else should be rejected. + */ + if (!special_file(VFS_I(sip)->i_mode) || + sip->i_projid != 0) { + error = -EXDEV; + goto error_return; + } } if (!resblks) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c21f10ab0f5d..bce020374c5e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -485,7 +485,7 @@ xfs_open_devices( mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); } return 0; @@ -497,10 +497,10 @@ xfs_open_devices( xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_file) - fput(rtdev_file); + bdev_fput(rtdev_file); out_close_logdev: if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); return error; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 3f7e3a09a49f..1636663707dc 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -268,19 +268,14 @@ static inline void xfs_trans_set_context( struct xfs_trans *tp) { - ASSERT(current->journal_info == NULL); tp->t_pflags = memalloc_nofs_save(); - current->journal_info = tp; } static inline void xfs_trans_clear_context( struct xfs_trans *tp) { - if (current->journal_info == tp) { - memalloc_nofs_restore(tp->t_pflags); - current->journal_info = NULL; - } + memalloc_nofs_restore(tp->t_pflags); } static inline void @@ -288,10 +283,8 @@ xfs_trans_switch_context( struct xfs_trans *old_tp, struct xfs_trans *new_tp) { - ASSERT(current->journal_info == old_tp); new_tp->t_pflags = old_tp->t_pflags; old_tp->t_pflags = 0; - current->journal_info = new_tp; } #endif /* __XFS_TRANS_H__ */ diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index c6a124e8d565..964fa7f24003 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb, zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", zonefs_zgroup_name(ztype), zgroup->g_nr_zones, - zgroup->g_nr_zones > 1 ? "s" : ""); + str_plural(zgroup->g_nr_zones)); return 0; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 5de954e2b18a..e7796f373d0d 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2) * acpi_dev_hid_uid_match - Match device by supplied HID and UID * @adev: ACPI device to match. * @hid2: Hardware ID of the device. - * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID. + * @uid2: Unique ID of the device, pass NULL to not check _UID. * * Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2 * will be treated as a match. If user wants to validate @uid2, it should be * done before calling this function. * - * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise. + * Returns: %true if matches or @uid2 is NULL, %false otherwise. */ #define acpi_dev_hid_uid_match(adev, hid2, uid2) \ (acpi_dev_hid_match(adev, hid2) && \ - (!(uid2) || acpi_dev_uid_match(adev, uid2))) + /* Distinguish integer 0 from NULL @uid2 */ \ + (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \ + acpi_dev_uid_match(adev, uid2))) void acpi_dev_clear_dependencies(struct acpi_device *supplier); bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 0c0695763bea..d4f581c1e21d 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -294,5 +294,13 @@ do { \ #define io_stop_wc() do { } while (0) #endif +/* + * Architectures that guarantee an implicit smp_mb() in switch_mm() + * can override smp_mb__after_switch_mm. + */ +#ifndef smp_mb__after_switch_mm +# define smp_mb__after_switch_mm() smp_mb() +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 6e794420bd39..b7de3a4eade1 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #else /* !CONFIG_BUG */ #ifndef HAVE_ARCH_BUG -#define BUG() do {} while (1) +#define BUG() do { \ + do {} while (1); \ + unreachable(); \ +} while (0) #endif #ifndef HAVE_ARCH_BUG_ON diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h deleted file mode 100644 index 570cd4da7210..000000000000 --- a/include/asm-generic/export.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef __ASM_GENERIC_EXPORT_H -#define __ASM_GENERIC_EXPORT_H - -/* - * and are deprecated. - * Please include directly. - */ -#include - -#endif diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 87e3d49a4e29..814207e7c37f 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -512,13 +512,9 @@ struct hv_proximity_domain_flags { u32 proximity_info_valid : 1; } __packed; -/* Not a union in windows but useful for zeroing */ -union hv_proximity_domain_info { - struct { - u32 domain_id; - struct hv_proximity_domain_flags flags; - }; - u64 as_uint64; +struct hv_proximity_domain_info { + u32 domain_id; + struct hv_proximity_domain_flags flags; } __packed; struct hv_lp_startup_status { @@ -532,14 +528,13 @@ struct hv_lp_startup_status { } __packed; /* HvAddLogicalProcessor hypercall */ -struct hv_add_logical_processor_in { +struct hv_input_add_logical_processor { u32 lp_index; u32 apic_id; - union hv_proximity_domain_info proximity_domain_info; - u64 flags; + struct hv_proximity_domain_info proximity_domain_info; } __packed; -struct hv_add_logical_processor_out { +struct hv_output_add_logical_processor { struct hv_lp_startup_status startup_status; } __packed; @@ -560,7 +555,7 @@ struct hv_create_vp { u8 padding[3]; u8 subnode_type; u64 subnode_id; - union hv_proximity_domain_info proximity_domain_info; + struct hv_proximity_domain_info proximity_domain_info; u64 flags; } __packed; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 99935779682d..8fe7aaab2599 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); +static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node) +{ + struct hv_proximity_domain_info pxm_info = {}; + + if (node != NUMA_NO_NODE) { + pxm_info.domain_id = node_to_pxm(node); + pxm_info.flags.proximity_info_valid = 1; + pxm_info.flags.proximity_preferred = 1; + } + + return pxm_info; +} + /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ static inline int hv_result(u64 status) { diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index eb4c369a79eb..35d4ca4f6122 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void); */ #define kvm_pmu_update_vcpu_events(vcpu) \ do { \ - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + if (!has_vhe() && kvm_arm_support_pmu_v3()) \ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3e8f7cf96be..69e7da33ca49 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -128,6 +128,8 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) /* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ #define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) +/* return partition scanning errors */ +#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6)) struct gendisk { /* @@ -1505,16 +1507,6 @@ struct blk_holder_ops { * Thaw the file system mounted on the block device. */ int (*thaw)(struct block_device *bdev); - - /* - * If needed, get a reference to the holder. - */ - void (*get_holder)(void *holder); - - /* - * Release the holder. - */ - void (*put_holder)(void *holder); }; /* @@ -1585,6 +1577,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) int bdev_freeze(struct block_device *bdev); int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df..3f4b4ac527ca 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include #include +bool __init cmdline_has_extra_options(void); #else /* !__KERNEL__ */ /* * NOTE: This is only for tools/bootconfig, because tools/bootconfig will @@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f20f62f9d63..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1574,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ diff --git a/include/linux/clk.h b/include/linux/clk.h index 00623f4de5e1..0fa56d672532 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -286,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk) return 0; } +static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) +{ + return 0; +} + static inline void clk_rate_exclusive_put(struct clk *clk) {} #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c00cc6c0878a..8c252e073bd8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off) * - When one operand is a null pointer constant (i.e. when x is an integer * constant expression) and the other is an object pointer (i.e. our * third operand), the conditional operator returns the type of the - * object pointer operand (i.e. "int *). Here, within the sizeof(), we + * object pointer operand (i.e. "int *"). Here, within the sizeof(), we * would then get: * sizeof(*((int *)(...)) == sizeof(int) == 4 * - When one operand is a void pointer (i.e. when x is not an integer diff --git a/include/linux/device.h b/include/linux/device.h index c515ba5756e4..07f4d2af1e3c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1236,6 +1236,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e06bad467f55..c3f9bb6602ba 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence) return dma_fence_is_array(fence) || dma_fence_is_chain(fence); } +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ + ##args); \ + } while (0) + #endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 770755df852f..70cd7258cd29 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -245,7 +245,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * max utilization to the allowed CPU capacity before calculating * effective performance. */ - max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); /* diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h index 9a9b88962c29..2b85fe9e7f9a 100644 --- a/include/linux/framer/framer.h +++ b/include/linux/framer/framer.h @@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer, return -ENOSYS; } -struct framer *framer_get(struct device *dev, const char *con_id) +static inline struct framer *framer_get(struct device *dev, const char *con_id) { return ERR_PTR(-ENOSYS); } -void framer_put(struct device *dev, struct framer *framer) +static inline void framer_put(struct device *dev, struct framer *framer) { } diff --git a/include/linux/fs.h b/include/linux/fs.h index 00fc429b0af0..8dfd53b52744 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 868c8fb1bbc1..13becafe41df 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -2,6 +2,8 @@ #ifndef __LINUX_GFP_TYPES_H #define __LINUX_GFP_TYPES_H +#include + /* The typedef is in types.h but we want the documentation here */ #if 0 /** diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index dc75f802e284..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)); -struct gpio_device *gpio_device_find_by_label(const char *label); -struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); @@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + #else /* CONFIG_GPIOLIB */ #include @@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev) return NULL; } +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h index 6c75c8bd44a0..1a14e239221f 100644 --- a/include/linux/gpio/property.h +++ b/include/linux/gpio/property.h @@ -2,7 +2,6 @@ #ifndef __LINUX_GPIO_PROPERTY_H #define __LINUX_GPIO_PROPERTY_H -#include /* for GPIO_* flags */ #include #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6ef0557b4bff..96ceb4095425 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -832,6 +832,7 @@ struct vmbus_gpadl { u32 gpadl_handle; u32 size; void *buffer; + bool decrypted; }; struct vmbus_channel { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76121c2bb4f8..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e24893625085..ac333ea81d31 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -13,7 +13,7 @@ enum { * A hint to not wake right away but delay until there are enough of * tw's queued to match the number of CQEs the task is waiting for. * - * Must not be used wirh requests generating more than one CQE. + * Must not be used with requests generating more than one CQE. * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set. */ IOU_F_TWQ_LAZY_WAKE = 1, @@ -294,7 +294,6 @@ struct io_ring_ctx { struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; struct io_hash_table cancel_table_locked; diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 147feebd508c..3f003d5fde53 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -114,7 +114,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/libata.h b/include/linux/libata.h index 26d68115afb8..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 0436b919f1c7..7b0ee64225de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2207,11 +2207,6 @@ static inline int arch_make_folio_accessible(struct folio *folio) */ #include -static __always_inline void *lowmem_page_address(const struct page *page) -{ - return page_to_virt(page); -} - #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL #endif @@ -2234,6 +2229,11 @@ void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif +static __always_inline void *lowmem_page_address(const struct page *page) +{ + return page_to_virt(page); +} + #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) #define set_page_address(page, address) do { } while(0) diff --git a/include/linux/mman.h b/include/linux/mman.h index dc7048824be8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3921fbed0b28..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,10 +17,12 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { + OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -28,6 +30,7 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ + OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -64,6 +67,7 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ + OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index fcc06c300a72..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; diff --git a/include/linux/peci.h b/include/linux/peci.h index 9b3d36aff431..90e241458ef6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d) /** * struct peci_device - PECI device * @dev: device object to register PECI device to the device model - * @controller: manages the bus segment hosting this PECI device * @info: PECI device characteristics * @info.family: device family * @info.model: device model diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e..6d92b68efbf6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h index 29c4e4f243e4..f2394a409c9d 100644 --- a/include/linux/rwbase_rt.h +++ b/include/linux/rwbase_rt.h @@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb) return atomic_read(&rwb->readers) != READER_BIAS; } -static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb) { - WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS); + return atomic_read(&rwb->readers) == WRITER_BIAS; } static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 4f1c18992f76..c8b543d428b0 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) return rw_base_is_locked(&sem->rwbase); } -static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) { WARN_ON(!rwsem_is_locked(sem)); } -static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) { - rw_base_assert_held_write(sem); + WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); } static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) /* * Using folio_mapping() is quite slow because of the actual call * instruction. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a4c15db2f5e5..3fb18f7eb73e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags); +#else +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags) +{ + return false; +} +#endif + #ifdef CONFIG_SHMEM extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); #else diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c7c67b3a87b..9d24aec064e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -875,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 307961b41541..317200cd3a60 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, return 0; } +/* Deprecated. + * This is unsafe, unless caller checked user provided optlen. + * Prefer copy_safe_from_sockptr() instead. + */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { return copy_from_sockptr_offset(dst, src, 0, size); } +/** + * copy_safe_from_sockptr: copy a struct from sockptr + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @optval: Source address. (in user or kernel space) + * @optlen: Size of @optval data. + * + * Returns: + * * -EINVAL: @optlen < @ksize + * * -EFAULT: access to userspace failed. + * * 0 : @ksize bytes were copied + */ +static inline int copy_safe_from_sockptr(void *dst, size_t ksize, + sockptr_t optval, unsigned int optlen) +{ + if (optlen < ksize) + return -EINVAL; + return copy_from_sockptr(dst, optval, ksize); +} + static inline int copy_struct_from_sockptr(void *dst, size_t ksize, sockptr_t src, size_t usize) { diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3c6caa5abc7c..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t; union handle_parts { depot_stack_handle_t handle; struct { - /* pool_index is offset by 1 */ - u32 pool_index : DEPOT_POOL_INDEX_BITS; - u32 offset : DEPOT_OFFSET_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 48b700ba1d18..a5c560a2f8c2 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) @@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -#else - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} -#endif - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7e50cbd97f86..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -242,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { diff --git a/include/linux/timer.h b/include/linux/timer.h index 14a633ba61d6..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -22,7 +22,7 @@ #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index ffe48e69b3f3..457879938fc1 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b..e398e1dbd2d3 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -108,7 +108,7 @@ struct udp_sock { #define udp_assign_bit(nr, sk, val) \ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) @@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b0201747a263..26c4325aa373 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); /** * struct virtio_driver - operations for a virtio I/O driver - * @driver: underlying device driver (populate name and owner). + * @driver: underlying device driver (populate name). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. * @feature_table_size: number of entries in the feature table array. @@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) return container_of(drv, struct virtio_driver, driver); } -int register_virtio_driver(struct virtio_driver *drv); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define register_virtio_driver(drv) \ + __register_virtio_driver(drv, THIS_MODULE) +int __register_virtio_driver(struct virtio_driver *drv, struct module *owner); void unregister_virtio_driver(struct virtio_driver *drv); /* module_virtio_driver() - Helper macro for drivers that don't do diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9d06eb945509..62a407db1bf5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9fe95a22abeb..eaec5d6caa29 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, return skb; } +static inline int bt_copy_from_sockptr(void *dst, size_t dst_size, + sockptr_t src, size_t src_size) +{ + if (dst_size > src_size) + return -EINVAL; + + return copy_from_sockptr(dst, src, dst_size); +} + int bt_to_errno(u16 code); __u8 bt_status(int err); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8701ca5f31ee..5c12761cbc0e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -176,6 +176,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2e2be4fd2bb6..1e09329acc42 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4991,6 +4991,7 @@ struct cfg80211_ops { * set this flag to update channels on beacon hints. * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link * of an NSTR mobile AP MLD. + * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -5002,6 +5003,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), + WIPHY_FLAG_DISABLE_WEXT = BIT(9), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9ab4bf704e86..ccf171f7eb60 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -175,6 +175,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 5cd64bb2104d..c286cc2e766e 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 76147feb0d10..4eeedf14711b 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -39,7 +39,6 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 #define RX_BUFFERS_PER_QUEUE 512 -#define MANA_RX_DATA_ALIGN 64 #define MAX_SEND_BUFFERS_PER_QUEUE 256 diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index a763dd327c6e..9abb7ee40d72 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) { __be16 proto; @@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) return 0; } +static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) +{ + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return false; + + *inner_proto = __nf_flow_pppoe_proto(skb); + + return true; +} + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e27c28b612e4..3f1ed467f951 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) return (void *)priv; } + +/** + * enum nft_iter_type - nftables set iterator type + * + * @NFT_ITER_READ: read-only iteration over set elements + * @NFT_ITER_UPDATE: iteration under mutex to update set element state + */ +enum nft_iter_type { + NFT_ITER_UNSPEC, + NFT_ITER_READ, + NFT_ITER_UPDATE, +}; + struct nft_set; struct nft_set_iter { u8 genmask; + enum nft_iter_type type:8; unsigned int count; unsigned int skip; int err; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cefe0c4bdae3..41ca14e81d55 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -117,6 +117,7 @@ struct Qdisc { struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; + int owner; unsigned long state; unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; diff --git a/include/net/sock.h b/include/net/sock.h index b5e00702acc1..f57bfd8a2ad2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1759,6 +1759,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3cb4dc9bd70e..3d54de168a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, { if (!compl) return; + if (!compl->tx_timestamp) + return; *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); } diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba0..f40915d2ecee 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b259d42a1e1a..129001f600fc 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index a8bebac1e4b2..957295364a5e 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -56,6 +56,9 @@ struct hdac_ext_stream { u32 pphcldpl; u32 pphcldpu; + u32 pplcllpl; + u32 pplcllpu; + bool decoupled:1; bool link_locked:1; bool link_prepared; diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 53470d6a28d6..24dbe16684ae 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -143,6 +143,9 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, u8 num_ch, u32 rate, u8 dir, u8 dev_type); +int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id); + #else static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev) @@ -184,6 +187,13 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, return NULL; } +static inline int intel_nhlt_ssp_device_type(struct device *dev, + struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id) +{ + return -EINVAL; +} + #endif #endif diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 4038dd421150..1dc59005d241 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); #endif diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index ba2d96a1bc2f..f50fcafc69de 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -609,7 +609,7 @@ TRACE_EVENT(rpcgss_context, __field(unsigned int, timeout) __field(u32, window_size) __field(int, len) - __string(acceptor, data) + __string_len(acceptor, data, len) ), TP_fast_assign( @@ -618,7 +618,7 @@ TRACE_EVENT(rpcgss_context, __entry->timeout = timeout; __entry->window_size = window_size; __entry->len = len; - strncpy(__get_str(acceptor), data, len); + __assign_str(acceptor, data); ), TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9ce46edc62a5..2040a470ddb4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code { KFD_EC_MASK(EC_DEVICE_NEW)) #define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) +#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)) /* Checks for exception code types for KFD search */ +#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX) #define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) #define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET)) /* Runtime enable states */ diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bea697390613..b95dd84eef2d 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -179,12 +179,6 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) -/* Get the count of all virtqueues */ -#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) - -/* Get the number of virtqueue groups. */ -#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) - /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) @@ -228,10 +222,17 @@ #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ struct vhost_vring_state) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + /* Get the queue size of a specific virtqueue. * userspace set the vring index in vhost_vring_state.index * kernel set the queue size in vhost_vring_state.num */ -#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) #endif diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h index c72ce387286a..30a5c1a59376 100644 --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h @@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf { __u8 mpi_reply_type; __u8 rsvd1; __u16 rsvd2; - __u8 reply_buf[1]; + __u8 reply_buf[]; }; /** diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cb2afcebbdf5..a35e12f8e68b 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -328,6 +328,7 @@ struct ufs_pwr_mode_info { * @op_runtime_config: called to config Operation and runtime regs Pointers * @get_outstanding_cqs: called to get outstanding completion queues * @config_esi: called to config Event Specific Interrupt + * @config_scsi_dev: called to configure SCSI device parameters */ struct ufs_hba_variant_ops { const char *name; diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 5d5c0b8efff2..c71ddb6d4691 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,12 +19,6 @@ #include #include -#ifdef CONFIG_ARM64 -#include -#else -#include -#endif - #ifdef CONFIG_ARCH_HAS_VDSO_DATA #include #else @@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden */ union vdso_data_store { struct vdso_data data[CS_BASES]; - u8 page[PAGE_SIZE]; + u8 page[1U << CONFIG_PAGE_SHIFT]; }; /* diff --git a/init/initramfs.c b/init/initramfs.c index 5193fae330c3..814241b64827 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -367,7 +367,7 @@ static int __init do_name(void) if (S_ISREG(mode)) { int ml = maybe_link(); if (ml >= 0) { - int openflags = O_WRONLY|O_CREAT; + int openflags = O_WRONLY|O_CREAT|O_LARGEFILE; if (ml != 1) openflags |= O_TRUNC; wfile = filp_open(collected, openflags, mode); @@ -674,7 +674,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; diff --git a/init/main.c b/init/main.c index 2ca52474d0c3..5dcf5274c09c 100644 --- a/init/main.c +++ b/init/main.c @@ -487,6 +487,11 @@ static int __init warn_bootconfig(char *str) early_param("bootconfig", warn_bootconfig); +bool __init cmdline_has_extra_options(void) +{ + return extra_command_line || extra_init_args; +} + /* Change NUL term back to "=", to make "param" the whole string. */ static void __init repair_env_string(char *param, char *val) { @@ -631,6 +636,8 @@ static void __init setup_command_line(char *command_line) if (!saved_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + len = xlen + strlen(command_line) + 1; + static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5d4b448fdc50..c170a2b8d2cf 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -147,6 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, static void io_queue_sqe(struct io_kiocb *req); struct kmem_cache *req_cachep; +static struct workqueue_struct *iou_wq __ro_after_init; static int __read_mostly sysctl_io_uring_disabled; static int __read_mostly sysctl_io_uring_group = -1; @@ -350,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) err: kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); return NULL; @@ -1982,10 +1982,15 @@ fail: err = -EBADFD; if (!io_file_can_poll(req)) goto fail; - err = -ECANCELED; - if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) - goto fail; - return; + if (req->file->f_flags & O_NONBLOCK || + req->file->f_mode & FMODE_NOWAIT) { + err = -ECANCELED; + if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) + goto fail; + return; + } else { + req->flags &= ~REQ_F_APOLL_MULTISHOT; + } } if (req->flags & REQ_F_FORCE_ASYNC) { @@ -2597,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (__io_cqring_events_user(ctx) >= min_events) return 0; - if (sig) { -#ifdef CONFIG_COMPAT - if (in_compat_syscall()) - ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - sigsz); - else -#endif - ret = set_user_sigmask(sig, sigsz); - - if (ret) - return ret; - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); @@ -2628,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, io_napi_adjust_timeout(ctx, &iowq, &ts); } + if (sig) { +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, + sigsz); + else +#endif + ret = set_user_sigmask(sig, sigsz); + + if (ret) + return ret; + } + io_napi_busy_loop(ctx, &iowq); trace_io_uring_cqring_wait(ctx, min_events); @@ -2926,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_napi_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); } @@ -3161,7 +3165,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * noise and overhead, there's no discernable change in runtime * over using system_wq. */ - queue_work(system_unbound_wq, &ctx->exit_work); + queue_work(iou_wq, &ctx->exit_work); } static int io_uring_release(struct inode *inode, struct file *file) @@ -3443,14 +3447,15 @@ static void *io_uring_validate_mmap_request(struct file *file, ptr = ctx->sq_sqes; break; case IORING_OFF_PBUF_RING: { + struct io_buffer_list *bl; unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - rcu_read_lock(); - ptr = io_pbuf_get_address(ctx, bgid); - rcu_read_unlock(); - if (!ptr) - return ERR_PTR(-EINVAL); + bl = io_pbuf_get_bl(ctx, bgid); + if (IS_ERR(bl)) + return bl; + ptr = bl->buf_ring; + io_put_bl(ctx, bl); break; } default: @@ -4185,6 +4190,8 @@ static int __init io_uring_init(void) io_buf_cachep = KMEM_CACHE(io_buffer, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); + iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); + #ifdef CONFIG_SYSCTL register_sysctl_init("kernel", kernel_io_uring_disabled_table); #endif diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 693c26da4ee1..3aa16e27f509 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -17,8 +17,6 @@ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) -#define BGID_ARRAY 64 - /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) @@ -40,13 +38,9 @@ struct io_buf_free { int inuse; }; -static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, - unsigned int bgid) +static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) { - if (bl && bgid < BGID_ARRAY) - return &bl[bgid]; - return xa_load(&ctx->io_bl_xa, bgid); } @@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, { lockdep_assert_held(&ctx->uring_lock); - return __io_buffer_get_list(ctx, ctx->io_bl, bgid); + return __io_buffer_get_list(ctx, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, @@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, * always under the ->uring_lock, but the RCU lookup from mmap does. */ bl->bgid = bgid; - smp_store_release(&bl->is_ready, 1); - - if (bgid < BGID_ARRAY) - return 0; - + atomic_set(&bl->refs, 1); return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } @@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, return ret; } -static __cold int io_init_bl_list(struct io_ring_ctx *ctx) -{ - struct io_buffer_list *bl; - int i; - - bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); - if (!bl) - return -ENOMEM; - - for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&bl[i].buf_list); - bl[i].bgid = i; - } - - smp_store_release(&ctx->io_bl, bl); - return 0; -} - /* * Mark the given mapped range as free for reuse */ @@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; } +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + if (atomic_dec_and_test(&bl->refs)) { + __io_remove_buffers(ctx, bl, -1U); + kfree_rcu(bl, rcu); + } +} + void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; struct list_head *item, *tmp; struct io_buffer *buf; unsigned long index; - int i; - - for (i = 0; i < BGID_ARRAY; i++) { - if (!ctx->io_bl) - break; - __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); - } xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); - __io_remove_buffers(ctx, bl, -1U); - kfree_rcu(bl, rcu); + io_put_bl(ctx, bl); } /* @@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) io_ring_submit_lock(ctx, issue_flags); - if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { - ret = io_init_bl_list(ctx); - if (ret) - goto err; - } - bl = io_buffer_get_list(ctx, p->bgid); if (unlikely(!bl)) { bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); @@ -507,14 +473,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) if (ret) { /* * Doesn't need rcu free as it was never visible, but - * let's keep it consistent throughout. Also can't - * be a lower indexed array group, as adding one - * where lookup failed cannot happen. + * let's keep it consistent throughout. */ - if (p->bgid >= BGID_ARRAY) - kfree_rcu(bl, rcu); - else - WARN_ON_ONCE(1); + kfree_rcu(bl, rcu); goto err; } } @@ -679,12 +640,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { - int ret = io_init_bl_list(ctx); - if (ret) - return ret; - } - bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -733,11 +688,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!bl->is_buf_ring) return -EINVAL; - __io_remove_buffers(ctx, bl, -1U); - if (bl->bgid >= BGID_ARRAY) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree_rcu(bl, rcu); - } + xa_erase(&ctx->io_bl_xa, bl->bgid); + io_put_bl(ctx, bl); return 0; } @@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) return 0; } -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid) { struct io_buffer_list *bl; + bool ret; - bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); - - if (!bl || !bl->is_mmap) - return NULL; /* - * Ensure the list is fully setup. Only strictly needed for RCU lookup - * via mmap, and in that case only for the array indexed groups. For - * the xarray lookups, it's either visible and ready, or not at all. + * We have to be a bit careful here - we're inside mmap and cannot grab + * the uring_lock. This means the buffer_list could be simultaneously + * going away, if someone is trying to be sneaky. Look it up under rcu + * so we know it's not going away, and attempt to grab a reference to + * it. If the ref is already zero, then fail the mapping. If successful, + * the caller will call io_put_bl() to drop the the reference at at the + * end. This may then safely free the buffer_list (and drop the pages) + * at that point, vm_insert_pages() would've already grabbed the + * necessary vma references. */ - if (!smp_load_acquire(&bl->is_ready)) - return NULL; + rcu_read_lock(); + bl = xa_load(&ctx->io_bl_xa, bgid); + /* must be a mmap'able buffer ring and have pages */ + ret = false; + if (bl && bl->is_mmap) + ret = atomic_inc_not_zero(&bl->refs); + rcu_read_unlock(); - return bl->buf_ring; + if (ret) + return bl; + + return ERR_PTR(-EINVAL); } /* diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 1c7b654ee726..df365b8860cf 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -25,12 +25,12 @@ struct io_buffer_list { __u16 head; __u16 mask; + atomic_t refs; + /* ring mapped provided buffers */ __u8 is_buf_ring; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; - /* bl is visible from an RCU point of view for lookup */ - __u8 is_ready; }; struct io_buffer { @@ -61,7 +61,9 @@ void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid); static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { diff --git a/io_uring/net.c b/io_uring/net.c index 1e7665ff6ef7..4afb475d4197 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) diff --git a/io_uring/rw.c b/io_uring/rw.c index 0585ebcc9773..c8d48287439e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -936,6 +936,13 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) ret = __io_read(req, issue_flags); + /* + * If the file doesn't support proper NOWAIT, then disable multishot + * and stay in single shot mode. + */ + if (!io_file_supports_nowait(req)) + req->flags &= ~REQ_F_APOLL_MULTISHOT; + /* * If we get -EAGAIN, recycle our buffer and just let normal poll * handling arm it. @@ -955,7 +962,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) /* * Any successful return value will keep the multishot read armed. */ - if (ret > 0) { + if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) { /* * Put our buffer and post a CQE. If we fail to post a CQE, then * jump to the termination path. This request is then done. diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 368c5d86b5b7..e497011261b8 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) # ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif -CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) +CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 86571e760dd6..343c3456c8dd 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -38,7 +38,7 @@ /* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ #define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8) -#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ) +#define KERN_VM_SZ (SZ_4G + GUARD_SZ) struct bpf_arena { struct bpf_map map; @@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr) return ERR_PTR(-EINVAL); vm_range = (u64)attr->max_entries * PAGE_SIZE; - if (vm_range > (1ull << 32)) + if (vm_range > SZ_4G) return ERR_PTR(-E2BIG); if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32)) @@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (pgoff) return -EINVAL; - if (len > (1ull << 32)) + if (len > SZ_4G) return -E2BIG; /* if user_vm_start was specified at arena creation time */ @@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (WARN_ON_ONCE(arena->user_vm_start)) /* checks at map creation time should prevent this */ return -EFAULT; - return round_up(ret, 1ull << 32); + return round_up(ret, SZ_4G); } static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) @@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) return -EBUSY; /* Earlier checks should prevent this */ - if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff)) + if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff)) return -EFAULT; if (remember_vma(arena, vma)) @@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt if (uaddr & ~PAGE_MASK) return 0; pgoff = compute_pgoff(arena, uaddr); - if (pgoff + page_cnt > page_cnt_max) + if (pgoff > page_cnt_max - page_cnt) /* requested address will be outside of user VMA */ return 0; } @@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt goto out; uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE); - /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */ + /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1 + * will not overflow 32-bit. Lower 32-bit need to represent + * contiguous user address range. + * Map these pages at kern_vm_start base. + * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow + * lower 32-bit and it's ok. + */ ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32, kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages); if (ret) { @@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt) if (!page) continue; if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */ + /* Optimization for the common case of page_cnt==1: + * If page wasn't mapped into some user vma there + * is no need to call zap_pages which is slow. When + * page_cnt is big it's faster to do the batched zap. + */ zap_pages(arena, full_uaddr, 1); vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE); __free_page(page); diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index addf3dd57b59..35e1ddca74d2 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key return -EOPNOTSUPP; } +/* Called from syscall */ +static int bloom_map_alloc_check(union bpf_attr *attr) +{ + if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return -E2BIG; + + return 0; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map) BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a89587859571..449b9a5d3fe3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..c287925471f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link) atomic64_inc(&link->refcnt); } +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) +{ + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); + + /* free bpf_link and its containing memory */ + link->ops->dealloc_deferred(link); +} + +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_link_defer_dealloc_rcu_gp(rcu); + else + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); +} + /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bool sleepable = false; + bpf_link_free_id(link->id); if (link->prog) { + sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ link->ops->release(link); bpf_prog_put(link->prog); } - /* free bpf_link and its containing memory */ - link->ops->dealloc(link); + if (link->ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period + */ + if (sleepable) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); + } + if (link->ops->dealloc) + link->ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, - .dealloc = bpf_raw_tp_link_dealloc, + .dealloc_deferred = bpf_raw_tp_link_dealloc, .show_fdinfo = bpf_raw_tp_link_show_fdinfo, .fill_link_info = bpf_raw_tp_link_fill_link_info, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 63749ad5ac6b..98188379d5c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_arena_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return reg->type == PTR_TO_ARENA; +} + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #ifdef CONFIG_NET [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], @@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { @@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg)) { + is_sk_reg(env, insn->dst_reg) || + is_arena_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str(env, reg_state(env, insn->dst_reg)->type)); @@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); return -EINVAL; } + if (!env->prog->aux->arena) { + verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); + return -EINVAL; + } } else { if ((insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) || insn->imm) { @@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->imm) { /* off == BPF_ADDR_SPACE_CAST */ mark_reg_unknown(env, regs, insn->dst_reg); - if (insn->imm == 1) /* cast from as(1) to as(0) */ + if (insn->imm == 1) { /* cast from as(1) to as(0) */ dst_reg->type = PTR_TO_ARENA; + /* PTR_TO_ARENA is 32-bit */ + dst_reg->subreg_def = env->insn_idx + 1; + } } else if (insn->off == 0) { /* case: R1 = R2 * copy register state to dest reg @@ -18359,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) } if (!env->prog->jit_requested) { verbose(env, "JIT is required to use arena\n"); + fdput(f); return -EOPNOTSUPP; } if (!bpf_jit_supports_arena()) { verbose(env, "JIT doesn't support arena\n"); + fdput(f); return -EOPNOTSUPP; } env->prog->aux->arena = (void *)map; if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { verbose(env, "arena's user address must be set via map_extra or mmap()\n"); + fdput(f); return -EINVAL; } } @@ -19601,8 +19624,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { /* convert to 32-bit mov that clears upper 32-bit */ insn->code = BPF_ALU | BPF_MOV | BPF_X; - /* clear off, so it's a normal 'wX = wY' from JIT pov */ + /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ insn->off = 0; + insn->imm = 0; } /* cast from as(0) to as(1) should be handled by JIT */ goto next_insn; } diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 7a5bbfc024b7..4b4cfcba3190 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -39,11 +39,12 @@ CONFIG_UBSAN=y CONFIG_UBSAN_TRAP=y CONFIG_UBSAN_BOUNDS=y # CONFIG_UBSAN_SHIFT is not set -# CONFIG_UBSAN_DIV_ZERO -# CONFIG_UBSAN_UNREACHABLE -# CONFIG_UBSAN_BOOL -# CONFIG_UBSAN_ENUM -# CONFIG_UBSAN_ALIGNMENT +# CONFIG_UBSAN_DIV_ZERO is not set +# CONFIG_UBSAN_UNREACHABLE is not set +# CONFIG_UBSAN_SIGNED_WRAP is not set +# CONFIG_UBSAN_BOOL is not set +# CONFIG_UBSAN_ENUM is not set +# CONFIG_UBSAN_ALIGNMENT is not set # Sampling-based heap out-of-bounds and use-after-free detection. CONFIG_KFENCE=y diff --git a/kernel/cpu.c b/kernel/cpu.c index 8f6affd051f7..07ad53b7f119 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3207,7 +3207,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index bbb6c3cb00e4..066668799f75 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -366,7 +366,9 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY insert_resource(&iomem_resource, &crashk_low_res); +#endif #endif return 0; } @@ -448,8 +450,12 @@ retry: crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + insert_resource(&iomem_resource, &crashk_res); +#endif } +#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY static __init int insert_crashkernel_resources(void) { if (crashk_res.start < crashk_res.end) @@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void) } early_initcall(insert_crashkernel_resources); #endif +#endif diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 86fe172b5958..a5e0dfc44d24 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -69,11 +69,14 @@ * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; - unsigned int list; + unsigned short list; + unsigned short pad_slots; }; static bool swiotlb_force_bounce; @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); @@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev) #endif } -/* - * Return the offset into a iotlb slot required to keep the device happy. +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. */ -static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) { - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* @@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; - unsigned int tlb_offset, orig_addr_offset; + int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; - tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); - orig_addr_offset = swiotlb_align_offset(dev, orig_addr); - if (tlb_offset < orig_addr_offset) { - dev_WARN_ONCE(dev, 1, - "Access before mapping start detected. orig offset %u, requested offset %u.\n", - orig_addr_offset, tlb_offset); - return; - } - - tlb_offset -= orig_addr_offset; - if (tlb_offset > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", - alloc_size, size, tlb_offset); - return; - } + /* + * It's valid for tlb_offset to be negative. This can happen when the + * "offset" returned by swiotlb_align_offset() is non-zero, and the + * tlb_addr is pointing within the first "offset" bytes of the second + * or subsequent slots of the allocated swiotlb area. While it's not + * valid for tlb_addr to be pointing within the first "offset" bytes + * of the first slot, there's no way to check for such an error since + * this function can't distinguish the first slot from the second and + * subsequent slots. + */ + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, 0, orig_addr); orig_addr += tlb_offset; alloc_size -= tlb_offset; @@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; @@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset; struct io_tlb_pool *pool; unsigned int i; int index; phys_addr_t tlb_addr; + unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, @@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset, alloc_align_mask, &pool); if (index == -1) { @@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; for (i = 0; i < nr_slots(alloc_size + offset); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; @@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; - unsigned int offset = swiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct io_tlb_area *area = &mem->areas[aindex]; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; int count, i; + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; + /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. @@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } /* @@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { - atomic_long_set(&mem->total_used, 0); - atomic_long_set(&mem->used_hiwater, 0); - mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); if (!mem->nslabs) return; @@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, &fops_io_tlb_hiwater); #ifdef CONFIG_SWIOTLB_DYNAMIC - atomic_long_set(&mem->transient_nslabs, 0); debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, mem, &fops_io_tlb_transient_used); #endif diff --git a/kernel/fork.c b/kernel/fork.c index 39a5046c2f0b..aebb3e6c96dc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; vm_flags_clear(tmp, VM_LOCKED_MASK); + /* + * Copy/update hugetlb private vma information. + */ + if (is_vm_hugetlb_page(tmp)) + hugetlb_dup_vma_private(tmp); + + /* + * Link the vma into the MT. After using __mt_dup(), memory + * allocation is not necessary here, so it cannot fail. + */ + vma_iter_bulk_store(&vmi, tmp); + + mm->map_count++; + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + file = tmp->vm_file; if (file) { struct address_space *mapping = file->f_mapping; @@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } - /* - * Copy/update hugetlb private vma information. - */ - if (is_vm_hugetlb_page(tmp)) - hugetlb_dup_vma_private(tmp); - - /* - * Link the vma into the MT. After using __mt_dup(), memory - * allocation is not necessary here, so it cannot fail. - */ - vma_iter_bulk_store(&vmi, tmp); - - mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); - if (tmp->vm_ops && tmp->vm_ops->open) - tmp->vm_ops->open(tmp); - if (retval) { mpnt = vma_next(&vmi); goto loop_out; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..bf9ae8a8686f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9d9095e81792..65adc815fc6e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index c3ced519e14b..f3e0329337f6 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -236,6 +236,10 @@ choice possible to load a signed module containing the algorithm to check the signature on that module. +config MODULE_SIG_SHA1 + bool "Sign modules with SHA-1" + select CRYPTO_SHA1 + config MODULE_SIG_SHA256 bool "Sign modules with SHA-256" select CRYPTO_SHA256 @@ -265,6 +269,7 @@ endchoice config MODULE_SIG_HASH string depends on MODULE_SIG || IMA_APPRAISE_MODSIG + default "sha1" if MODULE_SIG_SHA1 default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index e3ae93bbcb9b..09f8397bae15 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,6 +106,12 @@ static void s2idle_enter(void) swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); + /* + * Kick all CPUs to ensure that they resume their timers and restore + * consistent system state. + */ + wake_up_all_idle_cpus(); + cpus_read_unlock(); raw_spin_lock_irq(&s2idle_lock); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca5146006b94..adf99c05adca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2009,6 +2009,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d2242679239e..ae50f212775e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -79,6 +79,8 @@ # include #endif +#include + #include "cpupri.h" #include "cpudeadline.h" @@ -3445,13 +3447,19 @@ static inline void switch_mm_cid(struct rq *rq, * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu]. * Provide it here. */ - if (!prev->mm) // from kernel + if (!prev->mm) { // from kernel smp_mb(); - /* - * user -> user transition guarantees a memory barrier through - * switch_mm() when current->mm changes. If current->mm is - * unchanged, no barrier is needed. - */ + } else { // from user + /* + * user->user transition relies on an implicit + * memory barrier in switch_mm() when + * current->mm changes. If the architecture + * switch_mm() does not have an implicit memory + * barrier, it is emitted here. If current->mm + * is unchanged, no barrier is needed. + */ + smp_mb__after_switch_mm(); + } } if (prev->mm_cid_active) { mm_cid_snapshot_time(rq, prev->mm); diff --git a/kernel/sys.c b/kernel/sys.c index f8e543f1e38a..8bb106a56b3a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; - /* PARISC cannot allow mdwe as it needs writable stacks */ - if (IS_ENABLED(CONFIG_PARISC)) + /* + * EOPNOTSUPP might be more appropriate here in principle, but + * existing userspace depends on EINVAL specifically. + */ + if (!arch_memory_deny_write_exec_supported()) return -EINVAL; current_bits = get_current_mdwe(); diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 9de66bbbb3d1..4782edcbe7b9 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -129,15 +129,17 @@ static int posix_clock_open(struct inode *inode, struct file *fp) goto out; } pccontext->clk = clk; - fp->private_data = pccontext; - if (clk->ops.open) + if (clk->ops.open) { err = clk->ops.open(pccontext, fp->f_mode); - else - err = 0; - - if (!err) { - get_device(clk->dev); + if (err) { + kfree(pccontext); + goto out; + } } + + fp->private_data = pccontext; + get_device(clk->dev); + err = 0; out: up_read(&clk->rwsem); return err; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index fb0fdec8719a..d88b13076b79 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -7,6 +7,7 @@ * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ +#include #include #include #include @@ -84,7 +85,7 @@ int tick_is_oneshot_available(void) */ static void tick_periodic(int cpu) { - if (tick_do_timer_cpu == cpu) { + if (READ_ONCE(tick_do_timer_cpu) == cpu) { raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); @@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td, * If no cpu took the do_timer update, assign it to * this cpu: */ - if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - tick_do_timer_cpu = cpu; + if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) { + WRITE_ONCE(tick_do_timer_cpu, cpu); tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* @@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td, !tick_nohz_full_cpu(cpu)) { tick_take_do_timer_from_boot(); tick_do_timer_boot_cpu = -1; - WARN_ON(tick_do_timer_cpu != cpu); + WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu); #endif } @@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void) int tick_cpu_dying(unsigned int dying_cpu) { /* - * If the current CPU is the timekeeper, it's the only one that - * can safely hand over its duty. Also all online CPUs are in - * stop machine, guaranteed not to be idle, therefore it's safe - * to pick any online successor. + * If the current CPU is the timekeeper, it's the only one that can + * safely hand over its duty. Also all online CPUs are in stop + * machine, guaranteed not to be idle, therefore there is no + * concurrency and it's safe to pick any online successor. */ if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 269e21590df5..71a792cd8936 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -8,6 +8,7 @@ * * Started by: Thomas Gleixner and Ingo Molnar */ +#include #include #include #include @@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts, static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { - int cpu = smp_processor_id(); + int tick_cpu, cpu = smp_processor_id(); /* * Check if the do_timer duty was dropped. We don't care about @@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) * If nohz_full is enabled, this should not happen because the * 'tick_do_timer_cpu' CPU never relinquishes. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && - unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { + tick_cpu = READ_ONCE(tick_do_timer_cpu); + + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL WARN_ON_ONCE(tick_nohz_full_running); #endif - tick_do_timer_cpu = cpu; + WRITE_ONCE(tick_do_timer_cpu, cpu); + tick_cpu = cpu; } /* Check if jiffies need an update */ - if (tick_do_timer_cpu == cpu) + if (tick_cpu == cpu) tick_do_update_jiffies64(now); /* @@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu) * timers, workqueues, timekeeping, ...) on behalf of full dynticks * CPUs. It must remain online when nohz full is enabled. */ - if (tick_nohz_full_running && tick_do_timer_cpu == cpu) + if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu) return false; return true; } @@ -697,6 +700,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu) /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted + * @now: current ktime_t * * Called from interrupt entry when the CPU was idle * @@ -794,7 +798,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { @@ -820,7 +824,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu */ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { @@ -890,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { u64 basemono, next_tick, delta, expires; unsigned long basejiff; + int tick_cpu; basemono = get_jiffies_update(&basejiff); ts->last_jiffies = basejiff; @@ -946,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * Otherwise we can sleep as long as we want. */ delta = timekeeping_max_deferment(); - if (cpu != tick_do_timer_cpu && - (tick_do_timer_cpu != TICK_DO_TIMER_NONE || - !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST))) + tick_cpu = READ_ONCE(tick_do_timer_cpu); + if (tick_cpu != cpu && + (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST))) delta = KTIME_MAX; /* Calculate the next expiry time */ @@ -969,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED); + int tick_cpu; u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ @@ -1006,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * do_timer() never gets invoked. Keep track of the fact that it * was the one which had the do_timer() duty last. */ - if (cpu == tick_do_timer_cpu) { - tick_do_timer_cpu = TICK_DO_TIMER_NONE; + tick_cpu = READ_ONCE(tick_do_timer_cpu); + if (tick_cpu == cpu) { + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST); - } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { + } else if (tick_cpu != TICK_DO_TIMER_NONE) { tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST); } @@ -1172,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; if (tick_nohz_full_enabled()) { + int tick_cpu = READ_ONCE(tick_do_timer_cpu); + /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around */ - if (tick_do_timer_cpu == cpu) + if (tick_cpu == cpu) return false; /* Should not happen for nohz-full */ - if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) + if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE)) return false; } @@ -1287,6 +1296,8 @@ void tick_nohz_irq_exit(void) /** * tick_nohz_idle_got_tick - Check whether or not the tick handler has run + * + * Return: %true if the tick handler has run, otherwise %false */ bool tick_nohz_idle_got_tick(void) { @@ -1305,6 +1316,8 @@ bool tick_nohz_idle_got_tick(void) * stopped, it returns the next hrtimer. * * Called from power state control code with interrupts disabled + * + * Return: the next expiration time */ ktime_t tick_nohz_get_next_hrtimer(void) { @@ -1320,6 +1333,8 @@ ktime_t tick_nohz_get_next_hrtimer(void) * The return value of this function and/or the value returned by it through the * @delta_next pointer can be negative which must be taken into account by its * callers. + * + * Return: the expected length of the current sleep */ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { @@ -1357,8 +1372,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) /** * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value * for a particular CPU. + * @cpu: target CPU number * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for @cpu */ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) { @@ -1371,6 +1389,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) * tick_nohz_get_idle_calls - return the current idle calls counter value * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for the current CPU */ unsigned long tick_nohz_get_idle_calls(void) { @@ -1559,7 +1579,7 @@ early_param("skew_tick", skew_tick); /** * tick_setup_sched_timer - setup the tick emulation timer - * @mode: tick_nohz_mode to setup for + * @hrtimer: whether to use the hrtimer or not */ void tick_setup_sched_timer(bool hrtimer) { diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index e11c4dc65bcb..b4a7822f495d 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -46,8 +46,8 @@ struct tick_device { * @next_tick: Next tick to be fired when in dynticks mode. * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_waketime: Time when the idle was interrupted + * @idle_sleeptime_seq: sequence counter for data consistency * @idle_entrytime: Time when the idle call was entered - * @nohz_mode: Mode - one state of tick_nohz_mode * @last_jiffies: Base jiffies snapshot when next event was last computed * @timer_expires_base: Base time clock monotonic for @timer_expires * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index dee29f1f5b75..3baf2fbe6848 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64); /* * The timer wheel has LVL_DEPTH array levels. Each level provides an array of - * LVL_SIZE buckets. Each level is driven by its own clock and therefor each + * LVL_SIZE buckets. Each level is driven by its own clock and therefore each * level has a different granularity. * - * The level granularity is: LVL_CLK_DIV ^ lvl + * The level granularity is: LVL_CLK_DIV ^ level * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) * * The array level of a newly armed timer depends on the relative expiry * time. The farther the expiry time is away the higher the array level and - * therefor the granularity becomes. + * therefore the granularity becomes. * * Contrary to the original timer wheel implementation, which aims for 'exact' * expiry of the timers, this implementation removes the need for recascading @@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64); * struct timer_base - Per CPU timer base (number of base depends on config) * @lock: Lock protecting the timer_base * @running_timer: When expiring timers, the lock is dropped. To make - * sure not to race agains deleting/modifying a + * sure not to race against deleting/modifying a * currently running timer, the pointer is set to the * timer, which expires at the moment. If no timer is * running, the pointer is NULL. @@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr) } /* - * fixup_init is called when: + * timer_fixup_init is called when: * - an active object is initialized */ static bool timer_fixup_init(void *addr, enum debug_obj_state state) @@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused) } /* - * fixup_activate is called when: + * timer_fixup_activate is called when: * - an active object is activated * - an unknown non-static object is activated */ @@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) } /* - * fixup_free is called when: + * timer_fixup_free is called when: * - an active object is freed */ static bool timer_fixup_free(void *addr, enum debug_obj_state state) @@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state) } /* - * fixup_assert_init is called when: + * timer_fixup_assert_init is called when: * - an untracked/uninit-ed object is found */ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) @@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer, * @key: lockdep class key of the fake lock used for tracking timer * sync lock dependencies * - * init_timer_key() must be done to a timer prior calling *any* of the + * init_timer_key() must be done to a timer prior to calling *any* of the * other timer functions. */ void init_timer_key(struct timer_list *timer, @@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown) * If @shutdown is set then the lock has to be taken whether the * timer is pending or not to protect against a concurrent rearm * which might hit between the lockless pending check and the lock - * aquisition. By taking the lock it is ensured that such a newly + * acquisition. By taking the lock it is ensured that such a newly * enqueued timer is dequeued and cannot end up with * timer->function == NULL in the expiry code. * @@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, /* * When timer base is not set idle, undo the effect of - * tmigr_cpu_deactivate() to prevent inconsitent states - active + * tmigr_cpu_deactivate() to prevent inconsistent states - active * timer base but inactive timer migration hierarchy. * * When timer base was already marked idle, nothing will be diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index c63a0afdcebe..ccba875d2234 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, first_childevt = evt = data->evt; + /* + * Walking the hierarchy is required in any case when a + * remote expiry was done before. This ensures to not lose + * already queued events in non active groups (see section + * "Required event and timerqueue update after a remote + * expiry" in the documentation at the top). + * + * The two call sites which are executed without a remote expiry + * before, are not prevented from propagating changes through + * the hierarchy by the return: + * - When entering this path by tmigr_new_timer(), @evt->ignore + * is never set. + * - tmigr_inactive_up() takes care of the propagation by + * itself and ignores the return value. But an immediate + * return is possible if there is a parent, sparing group + * locking at this level, because the upper walking call to + * the parent will take care about removing this event from + * within the group and update next_expiry accordingly. + * + * However if there is no parent, ie: the hierarchy has only a + * single level so @group is the top level group, make sure the + * first event information of the group is updated properly and + * also handled properly, so skip this fast return path. + */ + if (evt->ignore && !remote && group->parent) + return true; + raw_spin_lock(&group->lock); childstate.state = 0; @@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, * queue when the expiry time changed only or when it could be ignored. */ if (timerqueue_node_queued(&evt->nextevt)) { - if ((evt->nextevt.expires == nextexp) && !evt->ignore) + if ((evt->nextevt.expires == nextexp) && !evt->ignore) { + /* Make sure not to miss a new CPU event with the same expiry */ + evt->cpu = first_childevt->cpu; goto check_toplvl; + } if (!timerqueue_del(&group->events, &evt->nextevt)) WRITE_ONCE(group->next_expiry, KTIME_MAX); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 61c541c36596..47345bf1d4a9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION config FTRACE_RECORD_RECURSION_SIZE int "Max number of recursed functions to record" - default 128 + default 128 depends on FTRACE_RECORD_RECURSION help This defines the limit of number of functions that can be diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0a5c4efc73c3..9dc605f08a23 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, - .dealloc = bpf_kprobe_multi_link_dealloc, + .dealloc_deferred = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; @@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link) umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); + if (umulti_link->task) + put_task_struct(umulti_link->task); + path_put(&umulti_link->path); } static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) @@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) struct bpf_uprobe_multi_link *umulti_link; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); - if (umulti_link->task) - put_task_struct(umulti_link->task); - path_put(&umulti_link->path); kvfree(umulti_link->uprobes); kfree(umulti_link); } @@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, - .dealloc = bpf_uprobe_multi_link_dealloc, + .dealloc_deferred = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 25476ead681b..6511dc3a00da 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); - local_inc(&cpu_buffer->pages_touched); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. @@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - /* Again, either we update tail_page or an interrupt does */ - (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); + /* Either we update tail_page or an interrupt does */ + if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) + local_inc(&cpu_buffer->pages_touched); } } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7c364b87352e..52f75c36bbca 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, @@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 217169de0920..dfe3ee6035ec 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -839,7 +839,7 @@ out: void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) { struct probe_entry_arg *earg = tp->entry_arg; - unsigned long val; + unsigned long val = 0; int i; if (!earg) diff --git a/lib/bootconfig.c b/lib/bootconfig.c index c59d26068a64..97f8911ea339 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size) return memblock_alloc(size, SMP_CACHE_BYTES); } -static inline void __init xbc_free_mem(void *addr, size_t size) +static inline void __init xbc_free_mem(void *addr, size_t size, bool early) { - memblock_free(addr, size); + if (early) + memblock_free(addr, size); + else if (addr) + memblock_free_late(__pa(addr), size); } #else /* !__KERNEL__ */ @@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size) return malloc(size); } -static inline void xbc_free_mem(void *addr, size_t size) +static inline void xbc_free_mem(void *addr, size_t size, bool early) { free(addr); } @@ -898,19 +901,20 @@ static int __init xbc_parse_tree(void) } /** - * xbc_exit() - Clean up all parsed bootconfig + * _xbc_exit() - Clean up all parsed bootconfig + * @early: Set true if this is called before budy system is initialized. * * This clears all data structures of parsed bootconfig on memory. * If you need to reuse xbc_init() with new boot config, you can * use this. */ -void __init xbc_exit(void) +void __init _xbc_exit(bool early) { - xbc_free_mem(xbc_data, xbc_data_size); + xbc_free_mem(xbc_data, xbc_data_size, early); xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early); xbc_nodes = NULL; brace_index = 0; } @@ -963,7 +967,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) if (!xbc_nodes) { if (emsg) *emsg = "Failed to allocate bootconfig nodes"; - xbc_exit(); + _xbc_exit(true); return -ENOMEM; } memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); @@ -977,7 +981,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) *epos = xbc_err_pos; if (emsg) *emsg = xbc_err_msg; - xbc_exit(); + _xbc_exit(true); } else ret = xbc_node_num; diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index bf70850035c7..404dba36bae3 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test) static void test_csum_ipv6_magic(struct kunit *test) { -#if defined(CONFIG_NET) const struct in6_addr *saddr; const struct in6_addr *daddr; unsigned int len; unsigned char proto; __wsum csum; + if (!IS_ENABLED(CONFIG_NET)) + return; + const int daddr_offset = sizeof(struct in6_addr); const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr); const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) + @@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test) CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]), csum_ipv6_magic(saddr, daddr, len, proto, csum)); } -#endif /* !CONFIG_NET */ } static struct kunit_case __refdata checksum_test_cases[] = { diff --git a/lib/stackdepot.c b/lib/stackdepot.c index af6cc19a2003..68c97387aa54 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) stack = current_pool + pool_offset; /* Pre-initialize handle once. */ - stack->handle.pool_index = pool_index + 1; + stack->handle.pool_index_plus_1 = pool_index + 1; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; INIT_LIST_HEAD(&stack->hash_list); @@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; - u32 pool_index = parts.pool_index - 1; + u32 pool_index = parts.pool_index_plus_1 - 1; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 276c12140ee2..c288df9372ed 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = { }; /* Excluded because they Oops the module. */ -static const test_ubsan_fp skip_ubsan_array[] = { +static __used const test_ubsan_fp skip_ubsan_array[] = { test_ubsan_divrem_overflow, }; diff --git a/lib/ubsan.c b/lib/ubsan.c index 5fc107f61934..a1c983d148f1 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -44,9 +44,10 @@ const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) case ubsan_shift_out_of_bounds: return "UBSAN: shift out of bounds"; #endif -#ifdef CONFIG_UBSAN_DIV_ZERO +#if defined(CONFIG_UBSAN_DIV_ZERO) || defined(CONFIG_UBSAN_SIGNED_WRAP) /* - * SanitizerKind::IntegerDivideByZero emits + * SanitizerKind::IntegerDivideByZero and + * SanitizerKind::SignedIntegerOverflow emit * SanitizerHandler::DivremOverflow. */ case ubsan_divrem_overflow: @@ -77,6 +78,19 @@ const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) return "UBSAN: alignment assumption"; case ubsan_type_mismatch: return "UBSAN: type mismatch"; +#endif +#ifdef CONFIG_UBSAN_SIGNED_WRAP + /* + * SanitizerKind::SignedIntegerOverflow emits + * SanitizerHandler::AddOverflow, SanitizerHandler::SubOverflow, + * or SanitizerHandler::MulOverflow. + */ + case ubsan_add_overflow: + return "UBSAN: integer addition overflow"; + case ubsan_sub_overflow: + return "UBSAN: integer subtraction overflow"; + case ubsan_mul_overflow: + return "UBSAN: integer multiplication overflow"; #endif default: return "UBSAN: unrecognized failure code"; diff --git a/mm/Makefile b/mm/Makefile index e4b5b75aaec9..4abb40b911ec 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -29,8 +29,7 @@ KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n KCOV_INSTRUMENT_failslab.o := n -CFLAGS_init-mm.o += $(call cc-disable-warning, override-init) -CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides) +CFLAGS_init-mm.o += -Wno-override-init mmu-y := nommu.o mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ diff --git a/mm/filemap.c b/mm/filemap.c index 7437b2bd75c1..30de18c4fd28 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4197,7 +4197,23 @@ static void filemap_cachestat(struct address_space *mapping, /* shmem file - in swap cache */ swp_entry_t swp = radix_to_swp_entry(folio); + /* swapin error results in poisoned entry */ + if (non_swap_entry(swp)) + goto resched; + + /* + * Getting a swap entry from the shmem + * inode means we beat + * shmem_unuse(). rcu_read_lock() + * ensures swapoff waits for us before + * freeing the swapper space. However, + * we can race with swapping and + * invalidation, so there might not be + * a shadow in the swapcache (yet). + */ shadow = get_shadow_from_swap_cache(swp); + if (!shadow) + goto resched; } #endif if (workingset_test_recent(shadow, true, &workingset)) diff --git a/mm/gup.c b/mm/gup.c index df83182ec72d..1611e73b1121 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm, /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { + /* + * MADV_POPULATE_(READ|WRITE) wants to handle VMA + * lookups+error reporting differently. + */ + if (gup_flags & FOLL_MADV_POPULATE) { + vma = vma_lookup(mm, start); + if (!vma) { + ret = -ENOMEM; + goto out; + } + if (check_vma_flags(vma, gup_flags)) { + ret = -EINVAL; + goto out; + } + goto retry; + } vma = gup_vma_lookup(mm, start); if (!vma && in_gate_area(mm, start)) { ret = get_gate_page(mm, start & PAGE_MASK, @@ -1653,20 +1669,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; + /* ... similarly, we've never faulted in PROT_NONE pages */ + if (!vma_is_accessible(vma)) + return -EFAULT; + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. + * + * Otherwise, do a read fault, and use FOLL_FORCE in case it's not + * readable (ie write-only or executable). */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; - - /* - * We want mlock to succeed for regions that have any permissions - * other than PROT_NONE. - */ - if (vma_is_accessible(vma)) + else gup_flags |= FOLL_FORCE; if (locked) @@ -1683,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma, } /* - * faultin_vma_page_range() - populate (prefault) page tables inside the - * given VMA range readable/writable + * faultin_page_range() - populate (prefault) page tables inside the + * given range readable/writable * * This takes care of mlocking the pages, too, if VM_LOCKED is set. * - * @vma: target vma + * @mm: the mm to populate page tables in * @start: start address * @end: end address * @write: whether to prefault readable or writable * @locked: whether the mmap_lock is still held * - * Returns either number of processed pages in the vma, or a negative error - * code on error (see __get_user_pages()). + * Returns either number of processed pages in the MM, or a negative error + * code on error (see __get_user_pages()). Note that this function reports + * errors related to VMAs, such as incompatible mappings, as expected by + * MADV_POPULATE_(READ|WRITE). * - * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and - * covered by the VMA. If it's released, *@locked will be set to 0. + * The range must be page-aligned. + * + * mm->mmap_lock must be held. If it's released, *@locked will be set to 0. */ -long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end, bool write, int *locked) +long faultin_page_range(struct mm_struct *mm, unsigned long start, + unsigned long end, bool write, int *locked) { - struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; long ret; VM_BUG_ON(!PAGE_ALIGNED(start)); VM_BUG_ON(!PAGE_ALIGNED(end)); - VM_BUG_ON_VMA(start < vma->vm_start, vma); - VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); /* @@ -1723,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, * a poisoned page. * !FOLL_FORCE: Require proper access permissions. */ - gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE; + gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE | + FOLL_MADV_POPULATE; if (write) gup_flags |= FOLL_WRITE; - /* - * We want to report -EINVAL instead of -EFAULT for any permission - * problems or incompatible mappings. - */ - if (check_vma_flags(vma, gup_flags)) - return -EINVAL; - - ret = __get_user_pages(mm, start, nr_pages, gup_flags, - NULL, locked); + ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked, + gup_flags); lru_add_drain(); return ret; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9859aa4f7553..89f58c7603b2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm goto unlock_ptls; } - folio_move_anon_rmap(src_folio, dst_vma); - WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); - src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd); /* Folio got pinned from under us. Put it back and fail the move. */ if (folio_maybe_dma_pinned(src_folio)) { @@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm goto unlock_ptls; } + folio_move_anon_rmap(src_folio, dst_vma); + WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); + _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot); /* Follow mremap() behavior and treat the entry dirty after the move */ _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 23ef240ba48a..31d00eee028f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (!pte_same(pte, newpte)) set_huge_pte_at(mm, address, ptep, newpte, psize); } else if (unlikely(is_pte_marker(pte))) { - /* No other markers apply for now. */ - WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); - if (uffd_wp_resolve) + /* + * Do nothing on a poison marker; page is + * corrupted, permissons do not apply. Here + * pte_marker_uffd_wp()==true implies !poison + * because they're mutual exclusive. + */ + if (pte_marker_uffd_wp(pte) && uffd_wp_resolve) /* Safe to modify directly (non-present->none). */ huge_pte_clear(mm, address, ptep, psize); } else if (!huge_pte_none(pte)) { diff --git a/mm/internal.h b/mm/internal.h index 7e486f2c502c..07ad2675a88b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio); void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); -extern long faultin_vma_page_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - bool write, int *locked); +extern long faultin_page_range(struct mm_struct *mm, unsigned long start, + unsigned long end, bool write, int *locked); extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags, unsigned long bytes); @@ -1127,10 +1126,13 @@ enum { FOLL_FAST_ONLY = 1 << 20, /* allow unlocking the mmap lock */ FOLL_UNLOCKABLE = 1 << 21, + /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */ + FOLL_MADV_POPULATE = 1 << 22, }; #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \ - FOLL_FAST_ONLY | FOLL_UNLOCKABLE) + FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \ + FOLL_MADV_POPULATE) /* * Indicates for which pages that are write-protected in the page table, diff --git a/mm/madvise.c b/mm/madvise.c index 44a498c94158..1a073fcc4c0c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma, { const bool write = behavior == MADV_POPULATE_WRITE; struct mm_struct *mm = vma->vm_mm; - unsigned long tmp_end; int locked = 1; long pages; *prev = vma; while (start < end) { - /* - * We might have temporarily dropped the lock. For example, - * our VMA might have been split. - */ - if (!vma || start >= vma->vm_end) { - vma = vma_lookup(mm, start); - if (!vma) - return -ENOMEM; - } - - tmp_end = min_t(unsigned long, end, vma->vm_end); /* Populate (prefault) page tables readable/writable. */ - pages = faultin_vma_page_range(vma, start, tmp_end, write, - &locked); + pages = faultin_page_range(mm, start, end, write, &locked); if (!locked) { mmap_read_lock(mm); locked = 1; @@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma, pr_warn_once("%s: unhandled return value: %ld\n", __func__, pages); fallthrough; - case -ENOMEM: + case -ENOMEM: /* No VMA or out of memory. */ return -ENOMEM; } } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9349948f1abf..9e62a00b46dd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page) { int ret; - zone_pcp_disable(page_zone(page)); + /* + * zone_pcp_disable() can't be used here. It will + * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold + * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap + * optimization is enabled. This will break current lock dependency + * chain and leads to deadlock. + * Disabling pcp before dissolving the page was a deterministic + * approach because we made sure that those pages cannot end up in any + * PCP list. Draining PCP lists expels those pages to the buddy system, + * but nothing guarantees that those pages do not get back to a PCP + * queue if we need to refill those. + */ ret = dissolve_free_huge_page(page); - if (!ret) + if (!ret) { + drain_all_pages(page_zone(page)); ret = take_page_off_buddy(page); - zone_pcp_enable(page_zone(page)); + } return ret; } diff --git a/mm/memory.c b/mm/memory.c index f2bc6dd15eb8..d2155ced45f8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1536,7 +1536,9 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); arch_check_zapped_pte(vma, ptent); tlb_remove_tlb_entry(tlb, pte, addr); - VM_WARN_ON_ONCE(userfaultfd_wp(vma)); + if (userfaultfd_pte_wp(vma, ptent)) + zap_install_uffd_wp_if_needed(vma, addr, pte, 1, + details, ptent); ksm_might_unmap_zero_page(mm, ptent); return 1; } @@ -5971,6 +5973,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = ptep_get(ptep); + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; diff --git a/mm/page_owner.c b/mm/page_owner.c index e7139952ffd9..742f432e5bf0 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -54,6 +54,22 @@ static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); +static inline void set_current_in_page_owner(void) +{ + /* + * Avoid recursion. + * + * We might need to allocate more memory from page_owner code, so make + * sure to signal it in order to avoid recursion. + */ + current->in_page_owner = 1; +} + +static inline void unset_current_in_page_owner(void) +{ + current->in_page_owner = 0; +} + static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); @@ -102,7 +118,6 @@ static __init void init_page_owner(void) register_dummy_stack(); register_failure_stack(); register_early_stack(); - static_branch_enable(&page_owner_inited); init_early_allocated_pages(); /* Initialize dummy and failure stacks and link them to stack_list */ dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle); @@ -113,6 +128,7 @@ static __init void init_page_owner(void) refcount_set(&failure_stack.stack_record->count, 1); dummy_stack.next = &failure_stack; stack_list = &dummy_stack; + static_branch_enable(&page_owner_inited); } struct page_ext_operations page_owner_ops = { @@ -133,23 +149,16 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) depot_stack_handle_t handle; unsigned int nr_entries; - /* - * Avoid recursion. - * - * Sometimes page metadata allocation tracking requires more - * memory to be allocated: - * - when new stack trace is saved to stack depot - */ if (current->in_page_owner) return dummy_handle; - current->in_page_owner = 1; + set_current_in_page_owner(); nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); handle = stack_depot_save(entries, nr_entries, flags); if (!handle) handle = failure_handle; + unset_current_in_page_owner(); - current->in_page_owner = 0; return handle; } @@ -164,9 +173,13 @@ static void add_stack_record_to_list(struct stack_record *stack_record, gfp_mask &= (GFP_ATOMIC | GFP_KERNEL); gfp_mask |= __GFP_NOWARN; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_mask); - if (!stack) + if (!stack) { + unset_current_in_page_owner(); return; + } + unset_current_in_page_owner(); stack->stack_record = stack_record; stack->next = NULL; @@ -183,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record, spin_unlock_irqrestore(&stack_list_lock, flags); } -static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask) +static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask, + int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); @@ -204,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask) /* Add the new stack_record to our list */ add_stack_record_to_list(stack_record, gfp_mask); } - refcount_inc(&stack_record->count); + refcount_add(nr_base_pages, &stack_record->count); } -static void dec_stack_record_count(depot_stack_handle_t handle) +static void dec_stack_record_count(depot_stack_handle_t handle, + int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); - if (stack_record) - refcount_dec(&stack_record->count); + if (!stack_record) + return; + + if (refcount_sub_and_test(nr_base_pages, &stack_record->count)) + pr_warn("%s: refcount went to 0 for %u handle\n", __func__, + handle); +} + +static inline void __update_page_owner_handle(struct page_ext *page_ext, + depot_stack_handle_t handle, + unsigned short order, + gfp_t gfp_mask, + short last_migrate_reason, u64 ts_nsec, + pid_t pid, pid_t tgid, char *comm) +{ + int i; + struct page_owner *page_owner; + + for (i = 0; i < (1 << order); i++) { + page_owner = get_page_owner(page_ext); + page_owner->handle = handle; + page_owner->order = order; + page_owner->gfp_mask = gfp_mask; + page_owner->last_migrate_reason = last_migrate_reason; + page_owner->pid = pid; + page_owner->tgid = tgid; + page_owner->ts_nsec = ts_nsec; + strscpy(page_owner->comm, comm, + sizeof(page_owner->comm)); + __set_bit(PAGE_EXT_OWNER, &page_ext->flags); + __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); + page_ext = page_ext_next(page_ext); + } +} + +static inline void __update_page_owner_free_handle(struct page_ext *page_ext, + depot_stack_handle_t handle, + unsigned short order, + pid_t pid, pid_t tgid, + u64 free_ts_nsec) +{ + int i; + struct page_owner *page_owner; + + for (i = 0; i < (1 << order); i++) { + page_owner = get_page_owner(page_ext); + /* Only __reset_page_owner() wants to clear the bit */ + if (handle) { + __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); + page_owner->free_handle = handle; + } + page_owner->free_ts_nsec = free_ts_nsec; + page_owner->free_pid = current->pid; + page_owner->free_tgid = current->tgid; + page_ext = page_ext_next(page_ext); + } } void __reset_page_owner(struct page *page, unsigned short order) { - int i; struct page_ext *page_ext; depot_stack_handle_t handle; depot_stack_handle_t alloc_handle; @@ -232,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order) alloc_handle = page_owner->handle; handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); - for (i = 0; i < (1 << order); i++) { - __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); - page_owner->free_handle = handle; - page_owner->free_ts_nsec = free_ts_nsec; - page_owner->free_pid = current->pid; - page_owner->free_tgid = current->tgid; - page_ext = page_ext_next(page_ext); - page_owner = get_page_owner(page_ext); - } + __update_page_owner_free_handle(page_ext, handle, order, current->pid, + current->tgid, free_ts_nsec); page_ext_put(page_ext); + if (alloc_handle != early_handle) /* * early_handle is being set as a handle for all those @@ -250,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order) * the machinery is not ready yet, we cannot decrement * their refcount either. */ - dec_stack_record_count(alloc_handle); -} - -static inline void __set_page_owner_handle(struct page_ext *page_ext, - depot_stack_handle_t handle, - unsigned short order, gfp_t gfp_mask) -{ - struct page_owner *page_owner; - int i; - u64 ts_nsec = local_clock(); - - for (i = 0; i < (1 << order); i++) { - page_owner = get_page_owner(page_ext); - page_owner->handle = handle; - page_owner->order = order; - page_owner->gfp_mask = gfp_mask; - page_owner->last_migrate_reason = -1; - page_owner->pid = current->pid; - page_owner->tgid = current->tgid; - page_owner->ts_nsec = ts_nsec; - strscpy(page_owner->comm, current->comm, - sizeof(page_owner->comm)); - __set_bit(PAGE_EXT_OWNER, &page_ext->flags); - __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); - - page_ext = page_ext_next(page_ext); - } + dec_stack_record_count(alloc_handle, 1 << order); } noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { struct page_ext *page_ext; + u64 ts_nsec = local_clock(); depot_stack_handle_t handle; handle = save_stack(gfp_mask); @@ -290,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order, page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; - __set_page_owner_handle(page_ext, handle, order, gfp_mask); + __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1, + current->pid, current->tgid, ts_nsec, + current->comm); page_ext_put(page_ext); - inc_stack_record_count(handle, gfp_mask); + inc_stack_record_count(handle, gfp_mask, 1 << order); } void __set_page_owner_migrate_reason(struct page *page, int reason) @@ -327,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order) void __folio_copy_owner(struct folio *newfolio, struct folio *old) { + int i; struct page_ext *old_ext; struct page_ext *new_ext; - struct page_owner *old_page_owner, *new_page_owner; + struct page_owner *old_page_owner; + struct page_owner *new_page_owner; + depot_stack_handle_t migrate_handle; old_ext = page_ext_get(&old->page); if (unlikely(!old_ext)) @@ -343,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old) old_page_owner = get_page_owner(old_ext); new_page_owner = get_page_owner(new_ext); - new_page_owner->order = old_page_owner->order; - new_page_owner->gfp_mask = old_page_owner->gfp_mask; - new_page_owner->last_migrate_reason = - old_page_owner->last_migrate_reason; - new_page_owner->handle = old_page_owner->handle; - new_page_owner->pid = old_page_owner->pid; - new_page_owner->tgid = old_page_owner->tgid; - new_page_owner->free_pid = old_page_owner->free_pid; - new_page_owner->free_tgid = old_page_owner->free_tgid; - new_page_owner->ts_nsec = old_page_owner->ts_nsec; - new_page_owner->free_ts_nsec = old_page_owner->ts_nsec; - strcpy(new_page_owner->comm, old_page_owner->comm); - + migrate_handle = new_page_owner->handle; + __update_page_owner_handle(new_ext, old_page_owner->handle, + old_page_owner->order, old_page_owner->gfp_mask, + old_page_owner->last_migrate_reason, + old_page_owner->ts_nsec, old_page_owner->pid, + old_page_owner->tgid, old_page_owner->comm); /* - * We don't clear the bit on the old folio as it's going to be freed - * after migration. Until then, the info can be useful in case of - * a bug, and the overall stats will be off a bit only temporarily. - * Also, migrate_misplaced_transhuge_page() can still fail the - * migration and then we want the old folio to retain the info. But - * in that case we also don't need to explicitly clear the info from - * the new page, which will be freed. + * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio + * will be freed after migration. Keep them until then as they may be + * useful. */ - __set_bit(PAGE_EXT_OWNER, &new_ext->flags); - __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags); + __update_page_owner_free_handle(new_ext, 0, old_page_owner->order, + old_page_owner->free_pid, + old_page_owner->free_tgid, + old_page_owner->free_ts_nsec); + /* + * We linked the original stack to the new folio, we need to do the same + * for the new one and the old folio otherwise there will be an imbalance + * when subtracting those pages from the stack. + */ + for (i = 0; i < (1 << new_page_owner->order); i++) { + old_page_owner->handle = migrate_handle; + old_ext = page_ext_next(old_ext); + old_page_owner = get_page_owner(old_ext); + } + page_ext_put(new_ext); page_ext_put(old_ext); } @@ -774,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) goto ext_put_continue; /* Found early allocated page */ - __set_page_owner_handle(page_ext, early_handle, - 0, 0); + __update_page_owner_handle(page_ext, early_handle, 0, 0, + -1, local_clock(), current->pid, + current->tgid, current->comm); count++; ext_put_continue: page_ext_put(page_ext); @@ -827,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos) * value of stack_list. */ stack = smp_load_acquire(&stack_list); + m->private = stack; } else { stack = m->private; - stack = stack->next; } - m->private = stack; - return stack; } @@ -848,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) return stack; } -static unsigned long page_owner_stack_threshold; +static unsigned long page_owner_pages_threshold; static int stack_print(struct seq_file *m, void *v) { - int i, stack_count; + int i, nr_base_pages; struct stack *stack = v; unsigned long *entries; unsigned long nr_entries; @@ -863,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v) nr_entries = stack_record->size; entries = stack_record->entries; - stack_count = refcount_read(&stack_record->count) - 1; + nr_base_pages = refcount_read(&stack_record->count) - 1; - if (stack_count < 1 || stack_count < page_owner_stack_threshold) + if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold) return 0; for (i = 0; i < nr_entries; i++) seq_printf(m, " %pS\n", (void *)entries[i]); - seq_printf(m, "stack_count: %d\n\n", stack_count); + seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); return 0; } @@ -900,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = { static int page_owner_threshold_get(void *data, u64 *val) { - *val = READ_ONCE(page_owner_stack_threshold); + *val = READ_ONCE(page_owner_pages_threshold); return 0; } static int page_owner_threshold_set(void *data, u64 val) { - WRITE_ONCE(page_owner_stack_threshold, val); + WRITE_ONCE(page_owner_pages_threshold, val); return 0; } diff --git a/mm/shmem.c b/mm/shmem.c index 0aad0d9a621b..94ab99b6b574 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb, #define shmem_huge SHMEM_HUGE_DENY -bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) -{ - return false; -} - static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_split) { diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c index 062d1c1097ae..ce514e700d2f 100644 --- a/mm/shmem_quota.c +++ b/mm/shmem_quota.c @@ -116,7 +116,7 @@ static int shmem_free_file_info(struct super_block *sb, int type) static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) { struct mem_dqinfo *info = sb_dqinfo(sb, qid->type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, *qid); struct quota_info *dqopt = sb_dqopt(sb); struct quota_id *entry = NULL; @@ -126,6 +126,7 @@ static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) return -ESRCH; down_read(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); @@ -165,7 +166,7 @@ out_unlock: static int shmem_acquire_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node **n; struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info; struct rb_node *parent = NULL, *new_node = NULL; struct quota_id *new_entry, *entry; @@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); down_write(&dqopt->dqio_sem); + n = &((struct rb_root *)info->dqi_priv)->rb_node; + while (*n) { parent = *n; entry = rb_entry(parent, struct quota_id, node); @@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct dquot *dquot) static int shmem_release_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, dquot->dq_id); struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); struct quota_id *entry = NULL; @@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dquot *dquot) goto out_dqlock; down_write(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 712160cd41ec..3c3539c573e7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1444,7 +1444,8 @@ static int uffd_move_lock(struct mm_struct *mm, */ down_read(&(*dst_vmap)->vm_lock->lock); if (*dst_vmap != *src_vmap) - down_read(&(*src_vmap)->vm_lock->lock); + down_read_nested(&(*src_vmap)->vm_lock->lock, + SINGLE_DEPTH_NESTING); } mmap_read_unlock(mm); return err; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 22aa63f4ef63..68fa001648cc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -989,6 +989,27 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } +static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) +{ + struct rb_node *n = root->rb_node; + + addr = (unsigned long)kasan_reset_tag((void *)addr); + + while (n) { + struct vmap_area *va; + + va = rb_entry(n, struct vmap_area, rb_node); + if (addr < va->va_start) + n = n->rb_left; + else if (addr >= va->va_end) + n = n->rb_right; + else + return va; + } + + return NULL; +} + /* Look up the first VA which satisfies addr < va_end, NULL if none. */ static struct vmap_area * __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) @@ -1025,47 +1046,39 @@ __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) static struct vmap_node * find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va) { - struct vmap_node *vn, *va_node = NULL; - struct vmap_area *va_lowest; + unsigned long va_start_lowest; + struct vmap_node *vn; int i; - for (i = 0; i < nr_vmap_nodes; i++) { +repeat: + for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) { vn = &vmap_nodes[i]; spin_lock(&vn->busy.lock); - va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root); - if (va_lowest) { - if (!va_node || va_lowest->va_start < (*va)->va_start) { - if (va_node) - spin_unlock(&va_node->busy.lock); + *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); - *va = va_lowest; - va_node = vn; - continue; - } - } + if (*va) + if (!va_start_lowest || (*va)->va_start < va_start_lowest) + va_start_lowest = (*va)->va_start; spin_unlock(&vn->busy.lock); } - return va_node; -} + /* + * Check if found VA exists, it might have gone away. In this case we + * repeat the search because a VA has been removed concurrently and we + * need to proceed to the next one, which is a rare case. + */ + if (va_start_lowest) { + vn = addr_to_node(va_start_lowest); -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) -{ - struct rb_node *n = root->rb_node; + spin_lock(&vn->busy.lock); + *va = __find_vmap_area(va_start_lowest, &vn->busy.root); - addr = (unsigned long)kasan_reset_tag((void *)addr); + if (*va) + return vn; - while (n) { - struct vmap_area *va; - - va = rb_entry(n, struct vmap_area, rb_node); - if (addr < va->va_start) - n = n->rb_left; - else if (addr >= va->va_end) - n = n->rb_right; - else - return va; + spin_unlock(&vn->busy.lock); + goto repeat; } return NULL; @@ -2343,6 +2356,9 @@ struct vmap_area *find_vmap_area(unsigned long addr) struct vmap_area *va; int i, j; + if (unlikely(!vmap_initialized)) + return NULL; + /* * An addr_to_node_id(addr) converts an address to a node index * where a VA is located. If VA spans several zones and passed diff --git a/mm/zswap.c b/mm/zswap.c index 9dec853647c8..caed028945b0 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1080,7 +1080,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) mutex_lock(&acomp_ctx->mutex); src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); - if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) { + /* + * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer + * to do crypto_acomp_decompress() which might sleep. In such cases, we must + * resort to copying the buffer to a temporary one. + * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer, + * such as a kmap address of high memory or even ever a vmap address. + * However, sg_init_one is only equipped to handle linearly mapped low memory. + * In such cases, we also must copy the buffer to a temporary and lowmem one. + */ + if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) || + !virt_addr_valid(src)) { memcpy(acomp_ctx->buffer, src, entry->length); src = acomp_ctx->buffer; zpool_unmap_handle(zpool, entry->handle); @@ -1094,7 +1104,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); mutex_unlock(&acomp_ctx->mutex); - if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool)) + if (src != acomp_ctx->buffer) zpool_unmap_handle(zpool, entry->handle); } @@ -1313,6 +1323,14 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) return 0; + /* + * The shrinker resumes swap writeback, which will enter block + * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS + * rules (may_enter_fs()), which apply on a per-folio basis. + */ + if (!gfp_has_io_fs(sc->gfp_mask)) + return 0; + #ifdef CONFIG_MEMCG_KMEM mem_cgroup_flush_stats(memcg); nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; @@ -1618,6 +1636,7 @@ bool zswap_load(struct folio *folio) swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); struct page *page = &folio->page; + bool swapcache = folio_test_swapcache(folio); struct zswap_tree *tree = swap_zswap_tree(swp); struct zswap_entry *entry; u8 *dst; @@ -1630,7 +1649,20 @@ bool zswap_load(struct folio *folio) spin_unlock(&tree->lock); return false; } - zswap_rb_erase(&tree->rbroot, entry); + /* + * When reading into the swapcache, invalidate our entry. The + * swapcache can be the authoritative owner of the page and + * its mappings, and the pressure that results from having two + * in-memory copies outweighs any benefits of caching the + * compression work. + * + * (Most swapins go through the swapcache. The notable + * exception is the singleton fault on SWP_SYNCHRONOUS_IO + * files, which reads into a private page and may free it if + * the fault fails. We remain the primary owner of the entry.) + */ + if (swapcache) + zswap_rb_erase(&tree->rbroot, entry); spin_unlock(&tree->lock); if (entry->length) @@ -1645,9 +1677,10 @@ bool zswap_load(struct folio *folio) if (entry->objcg) count_objcg_event(entry->objcg, ZSWPIN); - zswap_entry_free(entry); - - folio_mark_dirty(folio); + if (swapcache) { + zswap_entry_free(entry); + folio_mark_dirty(folio); + } return true; } diff --git a/net/9p/client.c b/net/9p/client.c index e265a0ca6bdd..f7e90b4769bb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) int total = 0; *err = 0; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", - fid->fid, offset, iov_iter_count(from)); - while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; @@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + fid->fid, offset, rsize, count); + /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, @@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1a3948b8c493..196060dc6138 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -95,7 +95,6 @@ struct p9_poll_wait { * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request - * @req: current request being processed (if any) * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index c5462486dbca..282ec581c072 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); #ifdef CONFIG_AX25_DAMA_SLAVE - ax25_ds_del_timer(ax25_dev); + timer_shutdown_sync(&ax25_dev->dama.slave_timer); #endif /* diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b95c36765d04..2243cec18ecc 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) spin_lock_bh(&bat_priv->tt.commit_lock); - while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1690ae57a09d..a7028d38c1f5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); - hci_cmd_sync_cancel_sync(hdev, -err); + hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ @@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) err = hci_send_frame(hdev, skb); if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); + hci_cmd_sync_cancel_sync(hdev, -err); return; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 233453807b50..ce3ff2fa72e5 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4ae224824012..a8b8cfebe018 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 00e02138003e..efea25eb56ce 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - if (skb) + if (skb) { + kfree_skb(hdev->req_skb); hdev->req_skb = skb_get(skb); + } wake_up_interruptible(&hdev->req_wait_q); } } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4ee1b976678b..703b84bd48d5 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, switch (optname) { case HCI_DATA_DIR: - if (copy_from_sockptr(&opt, optval, sizeof(opt))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len); + if (err) break; - } if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR; @@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, break; case HCI_TIME_STAMP: - if (copy_from_sockptr(&opt, optval, sizeof(opt))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len); + if (err) break; - } if (opt) hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP; @@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, uf.event_mask[1] = *((u32 *) f->event_mask + 1); } - len = min_t(unsigned int, len, sizeof(uf)); - if (copy_from_sockptr(&uf, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len); + if (err) break; - } if (!capable(CAP_NET_RAW)) { uf.type_mask &= hci_sec_filter.type_mask; @@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, goto done; } - if (copy_from_sockptr(&opt, optval, sizeof(opt))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len); + if (err) break; - } hci_pi(sk)->mtu = opt; break; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f6b662369322..c5d8799046cc 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); @@ -2811,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, if (qos->bcast.in.phy & BT_ISO_PHY_CODED) { cp->scanning_phys |= LE_SCAN_PHY_CODED; hci_le_scan_phy_params(phy, type, - interval, - window); + interval * 3, + window * 3); num_phy++; phy++; } @@ -2832,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, if (scan_coded(hdev)) { cp->scanning_phys |= LE_SCAN_PHY_CODED; - hci_le_scan_phy_params(phy, type, interval, window); + hci_le_scan_phy_params(phy, type, interval * 3, window * 3); num_phy++; phy++; } @@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index c8793e57f4b5..ef0cc80b4c0c 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos) static bool check_bcast_qos(struct bt_iso_qos *qos) { - if (qos->bcast.sync_factor == 0x00) - return false; + if (!qos->bcast.sync_factor) + qos->bcast.sync_factor = 0x01; if (qos->bcast.packing > 0x01) return false; @@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos) if (qos->bcast.skip > 0x01f3) return false; + if (!qos->bcast.sync_timeout) + qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT; + if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000) return false; @@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos) if (qos->bcast.mse > 0x1f) return false; + if (!qos->bcast.timeout) + qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT; + if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000) return false; @@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; - int len, err = 0; + int err = 0; struct bt_iso_qos qos = default_qos; u32 opt; @@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_PKT_STATUS: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags); @@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, break; } - len = min_t(unsigned int, sizeof(qos), optlen); - - if (copy_from_sockptr(&qos, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen); + if (err) break; - } - - if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) { - err = -EINVAL; - break; - } iso_pi(sk)->qos = qos; iso_pi(sk)->qos_user_set = true; @@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, } if (optlen > sizeof(iso_pi(sk)->base)) { - err = -EOVERFLOW; + err = -EINVAL; break; } - len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen); - - if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval, + optlen); + if (err) break; - } - iso_pi(sk)->base_len = len; + iso_pi(sk)->base_len = optlen; break; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 467b242d8be0..dc0897408793 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, return -EPROTO; hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_MGMT) && - !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_device_connected(hdev, hcon, NULL, 0); hci_dev_unlock(hdev); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4287aa6cc988..e7d810b23082 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -727,7 +727,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; - int len, err = 0; + int err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -754,11 +754,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_sockptr(&opts, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen); + if (err) break; - } if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; @@ -801,10 +799,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt & L2CAP_LM_FIPS) { err = -EINVAL; @@ -885,7 +882,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct bt_security sec; struct bt_power pwr; struct l2cap_conn *conn; - int len, err = 0; + int err = 0; u32 opt; u16 mtu; u8 mode; @@ -911,11 +908,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level < BT_SECURITY_LOW || sec.level > BT_SECURITY_FIPS) { @@ -960,10 +955,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) { set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -975,10 +969,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_FLUSHABLE_ON) { err = -EINVAL; @@ -1010,11 +1003,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; - len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_sockptr(&pwr, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen); + if (err) break; - } if (pwr.force_active) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1023,10 +1014,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } err = -EOPNOTSUPP; break; @@ -1055,10 +1045,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) break; - } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) @@ -1086,10 +1075,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mode, optval, sizeof(u8))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen); + if (err) break; - } BT_DBG("mode %u", mode); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index b54e8a530f55..29aa07e9db9d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, switch (optname) { case RFCOMM_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { + if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) { err = -EFAULT; break; } @@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct bt_security sec; int err = 0; - size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level > BT_SECURITY_HIGH) { err = -EINVAL; @@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 43daf965a01e..368e026f4d15 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; - int len, err = 0; + int err = 0; struct bt_voice voice; u32 opt; struct bt_codecs *codecs; @@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, voice.setting = sco_pi(sk)->setting; - len = min_t(unsigned int, sizeof(voice), optlen); - if (copy_from_sockptr(&voice, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&voice, sizeof(voice), optval, + optlen); + if (err) break; - } /* Explicitly check for these values */ if (voice.setting != BT_VOICE_TRANSPARENT && @@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_PKT_STATUS: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags); @@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(buffer, optval, optlen)) { + err = bt_copy_from_sockptr(buffer, optlen, optval, optlen); + if (err) { hci_dev_put(hdev); - err = -EFAULT; break; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f21097e73482..ceaa5a89b947 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) return netif_receive_skb(skb); } -static int br_pass_frame_up(struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb, bool promisc) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); @@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); + BR_INPUT_SKB_CB(skb)->promisc = promisc; + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, br_netif_receive_skb); @@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_mcast *brmctx; struct net_bridge_vlan *vlan; struct net_bridge *br; + bool promisc; u16 vid = 0; u8 state; @@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); - local_rcv = !!(br->dev->flags & IFF_PROMISC); + promisc = !!(br->dev->flags & IFF_PROMISC); + local_rcv = promisc; + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { /* by definition the broadcast is also a multicast address */ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { @@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb unsigned long now = jiffies; if (test_bit(BR_FDB_LOCAL, &dst->flags)) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, false); if (now != dst->used) dst->used = now; @@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } if (local_rcv) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, promisc); out: return 0; @@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } + BR_INPUT_SKB_CB(skb)->promisc = false; + /* The else clause should be hit when nf_hook(): * - returns < 0 (drop/error) * - returns = 0 (stolen/nf_queue) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 35e10c5a766d..22e35623c148 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 86ea5e6689b5..d4bedc87b1d8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -589,6 +589,7 @@ struct br_input_skb_cb { #endif u8 proxyarp_replied:1; u8 src_port_isolated:1; + u8 promisc:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 99d82676f780..cbd0e3586c3f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) struct ebt_table_info *newinfo; struct ebt_replace tmp; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; @@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, { struct compat_ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 6f877e31709b..c3c51b9a6826 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - enum ip_conntrack_info ctinfo; + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); struct nf_conn *ct; - if (skb->pkt_type == PACKET_HOST) + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; /* nf_conntrack_confirm() cannot handle concurrent clones, * this happens for broad/multicast frames with e.g. macvlan on top * of the bridge device. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + ct = container_of(nfct, struct nf_conn, ct_general); + if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) return NF_ACCEPT; /* let inet prerouting call conntrack again */ diff --git a/net/core/dev.c b/net/core/dev.c index 9a67003e49db..331848eca7d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -429,7 +429,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool); +static DEFINE_PER_CPU(struct page_pool *, system_page_pool); #ifdef CONFIG_LOCKDEP /* @@ -3775,6 +3775,10 @@ no_lock_out: return rc; } + if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) { + kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP); + return NET_XMIT_DROP; + } /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. @@ -3814,7 +3818,9 @@ no_lock_out: qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { + WRITE_ONCE(q->owner, smp_processor_id()); rc = dev_qdisc_enqueue(skb, q, &to_free, txq); + WRITE_ONCE(q->owner, -1); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/core/gro.c b/net/core/gro.c index ee30d4f0c038..83f35d99a682 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/core/sock.c b/net/core/sock.c index 43bf3818c19e..0963689a5950 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 27d733c0f65e..8598466a3805 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) @@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_elem *elem; int ret = -ENOENT; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c98b5b71ad7c..e9d45133d641 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev) { struct hsr_priv *hsr; struct hsr_port *port; - char designation; + const char *designation = NULL; hsr = netdev_priv(dev); - designation = '\0'; hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: - designation = 'A'; + designation = "Slave A"; break; case HSR_PT_SLAVE_B: - designation = 'B'; + designation = "Slave B"; break; default: - designation = '?'; + designation = "Unknown"; } if (!is_slave_up(port->dev)) - netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", + netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } - if (designation == '\0') + if (!designation) netdev_warn(dev, "No slave devices configured\n"); return 0; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index e5742f2a2d52..1b6457f357bd 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port) netdev_update_features(master->dev); dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); netdev_rx_handler_unregister(port->dev); - dev_set_promiscuity(port->dev, -1); + if (!port->hsr->fwd_offloaded) + dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 48741352a88a..c484b1c0fc00 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1050,6 +1050,11 @@ next: e++; } } + + /* Don't let NLM_DONE coalesce into a message, even if it could. + * Some user space expects NLM_DONE in a separate recv(). + */ + err = skb->len; out: cb->args[1] = e; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d8090f109ef..3b38610958ee 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - return false; + if (ipv6_only_sock(sk2)) { + if (sk->sk_family == AF_INET) + return false; + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return false; +#endif + } return inet_bind_conflict(sk, sk2, sk_uid, relax, reuseport_cb_ok, reuseport_ok); @@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + bool conflict = false; bool reuseport_cb_ok; rcu_read_lock(); @@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l spin_lock(&head2->lock); - inet_bind_bucket_for_each(tb2, &head2->chain) - if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) - break; + inet_bind_bucket_for_each(tb2, &head2->chain) { + if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + continue; - if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok)) { - spin_unlock(&head2->lock); - return true; + if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; + + conflict = true; + break; } spin_unlock(&head2->lock); - return false; + + return conflict; } /* @@ -771,6 +781,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4941f53b523..fb947d1613fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7b16c211b904..57ddcd8c62f6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 8f6e950163a7..1b991b889506 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -329,6 +329,7 @@ config NFT_COMPAT_ARP config IP_NF_ARPFILTER tristate "arptables-legacy packet filtering support" select IP_NF_ARPTABLES + select NETFILTER_FAMILY_ARP depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2407066b0fec..14365b20f1c5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -964,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1254,6 +1258,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1262,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7da1df4997d0..fe89a056eb06 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1116,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1492,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1500,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 74928a9d1aa4..535856b0f0ed 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -768,8 +768,10 @@ static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used) struct net *net = nh->net; int err; - if (nexthop_notifiers_is_empty(net)) + if (nexthop_notifiers_is_empty(net)) { + *hw_stats_used = false; return 0; + } err = nh_notifier_grp_hw_stats_init(&info, nh); if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c8f76f56dc16..d36ace160d42 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } out_put_peer: inet_putpeer(peer); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d20b62d52171..e767721b3a58 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 661d0e0d273f..c02bf011d4a6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9880743765c..3498dd1d0694 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; @@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 247bd4d8ee45..779aa6ecdd49 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } @@ -5416,10 +5417,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, err = 0; if (fillargs.ifindex) { - err = -ENODEV; dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); - if (!dev) + if (!dev) { + err = -ENODEV; goto done; + } idev = __in6_dev_get(dev); if (idev) err = in6_dump_addrs(idev, skb, cb, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5c558dc1c683..c1f62352a481 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -651,19 +651,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (!w) { /* New dump: * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. + * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; + + /* 2. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + } arg.skb = skb; @@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, goto out; } +#ifdef CONFIG_IPV6_SUBTREES pn = fn; -#ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ca7e77e84283..c89aef524df9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index fd9f049d6d41..131f7bb2110d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1133,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1501,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1509,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a51a44571c3..d0dcbaca1994 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c1e6469d091..8b1dd7f51249 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -447,7 +447,7 @@ csum_copy_err: goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 312bcaeea96f..bbd347de00b4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f03452dc716d..f67c1d021812 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2199,15 +2199,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 49da401c5340..35a8ba25fa57 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -158,7 +158,7 @@ do { \ _sdata_dbg(print, sdata, "[link %d] " fmt, \ link_id, ##__VA_ARGS__); \ else \ - _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \ + _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \ } while (0) #define link_dbg(link, fmt, ...) \ _link_id_dbg(1, (link)->sdata, (link)->link_id, \ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b6fead612b66..bd507d6b65e3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -131,7 +131,7 @@ struct ieee80211_bss { }; /** - * enum ieee80211_corrupt_data_flags - BSS data corruption flags + * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted * @@ -144,7 +144,7 @@ enum ieee80211_bss_corrupt_data_flags { }; /** - * enum ieee80211_valid_data_flags - BSS valid data flags + * enum ieee80211_bss_valid_data_flags - BSS valid data flags * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 47a2cba8313f..96b70006b7fc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5874,6 +5874,15 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.active_links != active_links) { + /* usable links are affected when active_links are changed, + * so notify the driver about the status change + */ + changed |= BSS_CHANGED_MLD_VALID_LINKS; + active_links &= sdata->vif.active_links; + if (!active_links) + active_links = + BIT(__ffs(sdata->vif.valid_links & + ~dormant_links)); ret = ieee80211_set_active_links(&sdata->vif, active_links); if (ret) { sdata_info(sdata, "Failed to set TTLM active links\n"); @@ -5888,7 +5897,6 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, goto out; } - changed |= BSS_CHANGED_MLD_VALID_LINKS; sdata->vif.suspended_links = suspended_links; if (sdata->vif.suspended_links) changed |= BSS_CHANGED_MLD_TTLM; @@ -7652,7 +7660,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "failed to insert STA entry for the AP (error %d)\n", err); - goto out_err; + goto out_release_chan; } } else WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid)); @@ -7663,8 +7671,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return 0; +out_release_chan: + ieee80211_link_release_channel(link); out_err: - ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); return err; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3a1967bc7bad..7e74b812e366 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_set_state(newsk, TCP_CLOSE); } } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); tcpfallback: newsk->sk_kern_sock = kern; lock_sock(newsk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index dcd1c76d2a3b..73fdf423de44 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) struct mptcp_subflow_context *subflow; int space, cap; + /* bpf can land here with a wrong sk type */ + if (sk->sk_protocol == IPPROTO_TCP) + return -EINVAL; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1626dd20c68f..6042a47da61b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -905,6 +905,8 @@ dispose_child: return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 9505f9d188ff..6eef15648b7b 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, proto = veth->h_vlan_encapsulated_proto; break; case htons(ETH_P_PPP_SES): - proto = nf_flow_pppoe_proto(skb); + if (!nf_flow_pppoe_proto(skb, &proto)) + return NF_ACCEPT; break; default: proto = skb->protocol; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index e45fade76409..5383bed3d3e0 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, tuple->encap[i].proto = skb->protocol; break; case htons(ETH_P_PPP_SES): - phdr = (struct pppoe_hdr *)skb_mac_header(skb); + phdr = (struct pppoe_hdr *)skb_network_header(skb); tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].proto = skb->protocol; break; @@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, +static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, u32 *offset) { struct vlan_ethhdr *veth; + __be16 inner_proto; switch (skb->protocol) { case htons(ETH_P_8021Q): @@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, } break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_proto(skb) == proto) { + if (nf_flow_pppoe_proto(skb, &inner_proto) && + inner_proto == proto) { *offset += PPPOE_SES_HLEN; return true; } @@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): - skb->protocol = nf_flow_pppoe_proto(skb); + skb->protocol = __nf_flow_pppoe_proto(skb); skb_pull(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5fa3d3540c93..167074283ea9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + + nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, elem_priv); return 0; @@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; + nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, catchall->elem); break; } @@ -626,6 +633,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { .genmask = nft_genmask_next(ctx->net), + .type = NFT_ITER_UPDATE, .fn = nft_mapelem_deactivate, }; @@ -1200,6 +1208,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) __NFT_TABLE_F_WAS_AWAKEN | \ __NFT_TABLE_F_WAS_ORPHAN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + ((trans->msg_type == NFT_MSG_NEWCHAIN && + nft_trans_chain_update(trans)) || + (trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)))) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1226,7 +1254,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, @@ -2430,6 +2458,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook = {}; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; @@ -2631,6 +2662,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } } + if (table->flags & __NFT_TABLE_F_UPDATE && + !list_empty(&hook.list)) { + NL_SET_BAD_ATTR(extack, attr); + err = -EOPNOTSUPP; + goto err_hooks; + } + if (!(table->flags & NFT_TABLE_F_DORMANT) && nft_is_base_chain(chain) && !list_empty(&hook.list)) { @@ -2860,6 +2898,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_trans *trans; int err; + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EOPNOTSUPP; + err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) @@ -2944,7 +2985,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { - if (chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN || + chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; if (nft_is_base_chain(chain)) { @@ -3026,7 +3068,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -3058,9 +3100,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -3841,6 +3887,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_data *data; int err; + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; @@ -3864,17 +3913,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { - u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_iter dummy_iter = { + .genmask = nft_genmask_next(ctx->net), + }; struct nft_set_elem_catchall *catchall; + struct nft_set_ext *ext; int ret = 0; list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) + if (!nft_set_elem_active(ext, dummy_iter.genmask)) continue; - ret = nft_setelem_validate(ctx, set, NULL, catchall->elem); + ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem); if (ret < 0) return ret; } @@ -5363,6 +5415,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + return nft_setelem_data_validate(ctx, set, elem_priv); } @@ -5407,6 +5464,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, } iter.genmask = nft_genmask_next(ctx->net); + iter.type = NFT_ITER_UPDATE; iter.skip = 0; iter.count = 0; iter.err = 0; @@ -5454,6 +5512,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + + /* called from abort path, reverse check to undo changes. */ + if (nft_set_elem_active(ext, iter->genmask)) + return 0; + + nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, elem_priv); return 0; @@ -5471,6 +5536,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; + nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, catchall->elem); break; } @@ -5480,6 +5546,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { .genmask = nft_genmask_next(ctx->net), + .type = NFT_ITER_UPDATE, .fn = nft_mapelem_activate, }; @@ -5744,6 +5811,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_set_dump_args *args; + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) return 0; @@ -5854,6 +5924,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.skb = skb; args.reset = dump_ctx->reset; args.iter.genmask = nft_genmask_cur(net); + args.iter.type = NFT_ITER_READ; args.iter.skip = cb->args[0]; args.iter.count = 0; args.iter.err = 0; @@ -6593,7 +6664,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set, struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_setelem_is_catchall(set, elem_priv)) { - nft_set_elem_change_active(net, set, ext); + nft_clear(net, ext); } else { set->ops->activate(net, set, elem_priv); } @@ -7152,6 +7223,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) } } +static int nft_setelem_active_next(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + u8 genmask = nft_genmask_next(net); + + return nft_set_elem_active(ext, genmask); +} + static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -7275,8 +7356,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_trans *trans; + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) @@ -7338,6 +7423,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) { struct nft_set_iter iter = { .genmask = genmask, + .type = NFT_ITER_UPDATE, .fn = nft_setelem_flush, }; @@ -7573,7 +7659,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; - list_for_each_entry(type, &nf_tables_objects, list) { + list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (type->family != NFPROTO_UNSPEC && type->family != family) continue; @@ -7589,9 +7675,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; + rcu_read_lock(); type = __nft_obj_type_get(objtype, family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -8263,11 +8353,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -8279,9 +8370,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -10182,9 +10277,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } } else { nft_chain_del(trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, @@ -10423,10 +10520,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -10460,9 +10558,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); @@ -10554,8 +10654,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_setelem_data_activate(net, te->set, te->elem_priv); - nft_setelem_activate(net, te->set, te->elem_priv); + if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { + nft_setelem_data_activate(net, te->set, te->elem_priv); + nft_setelem_activate(net, te->set, te->elem_priv); + } if (!nft_setelem_is_catchall(te->set, te->elem_priv)) te->set->ndeact--; @@ -10616,12 +10718,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -10634,6 +10731,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, gc_seq = nft_gc_seq_begin(nft_net); ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -10737,6 +10845,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); + if (!nft_set_elem_active(ext, iter->genmask)) + return 0; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; @@ -10821,6 +10932,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, continue; iter.genmask = nft_genmask_next(ctx->net); + iter.type = NFT_ITER_UPDATE; iter.skip = 0; iter.count = 0; iter.err = 0; @@ -11439,9 +11551,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); @@ -11533,6 +11646,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index a0055f510e31..b314ca728a29 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, return 0; iter.genmask = nft_genmask_next(ctx->net); + iter.type = NFT_ITER_UPDATE; iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 32df7a16835d..1caa04619dc6 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net, nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); - nft_set_elem_change_active(net, set, &be->ext); + nft_clear(net, &be->ext); } static void nft_bitmap_flush(const struct net *net, @@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, list_for_each_entry_rcu(be, &priv->list, head) { if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&be->ext, iter->genmask)) - goto cont; iter->err = iter->fn(ctx, set, iter, &be->priv); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6968a3b34236..daa56dda737a 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, { struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); - nft_set_elem_change_active(net, set, &he->ext); + nft_clear(net, &he->ext); } static void nft_rhash_flush(const struct net *net, @@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&he->ext, iter->genmask)) - goto cont; iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) @@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, { struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); - nft_set_elem_change_active(net, set, &he->ext); + nft_clear(net, &he->ext); } static void nft_hash_flush(const struct net *net, @@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, hlist_for_each_entry_rcu(he, &priv->table[i], node) { if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&he->ext, iter->genmask)) - goto cont; iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index df8de5090246..187138afac45 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1847,7 +1847,7 @@ static void nft_pipapo_activate(const struct net *net, { struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); - nft_set_elem_change_active(net, set, &e->ext); + nft_clear(net, &e->ext); } /** @@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = rules_f0; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; + if (!pipapo_match_field(f, start, rules_fx, match_start, match_end)) break; @@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); - } - if (i == m->field_count) { - priv->dirty = true; - pipapo_drop(m, rulemap); - return; + if (last && f->mt[rulemap[i].to].e == e) { + priv->dirty = true; + pipapo_drop(m, rulemap); + return; + } } first_rule += rules_f0; } + + WARN_ON_ONCE(1); /* elem_priv not found */ } /** @@ -2115,13 +2119,15 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_pipapo *priv = nft_set_priv(set); - struct net *net = read_pnet(&set->net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; unsigned int i, r; + WARN_ON_ONCE(iter->type != NFT_ITER_READ && + iter->type != NFT_ITER_UPDATE); + rcu_read_lock(); - if (iter->genmask == nft_genmask_cur(net)) + if (iter->type == NFT_ITER_READ) m = rcu_dereference(priv->match); else m = priv->clone; @@ -2143,9 +2149,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; - if (!nft_set_elem_active(&e->ext, iter->genmask)) - goto cont; - iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 9944fe479e53..b7ea21327549 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net, { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); - nft_set_elem_change_active(net, set, &rbe->ext); + nft_clear(net, &rbe->ext); } static void nft_rbtree_flush(const struct net *net, @@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, iter->genmask)) - goto cont; iter->err = iter->fn(ctx, set, iter, &rbe->priv); if (iter->err < 0) { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 819157bbb5a2..d5344563e525 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = copy_safe_from_sockptr(&opt, sizeof(opt), + optval, optlen); + if (err) break; - } if (opt > LLCP_MAX_RW) { err = -EINVAL; @@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = copy_safe_from_sockptr(&opt, sizeof(opt), + optval, optlen); + if (err) break; - } if (opt > LLCP_MAX_MIUX) { err = -EINVAL; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index cdad47b140fa..0d26c8ec9993 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_plen(skb->data)) { + kfree_skb(skb); + break; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3019a4406ca4..74b63cdb5992 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) - pr_info_ratelimited("Failed to associated timeout " - "policy `%s'\n", ct_info.timeout); + OVS_NLERR(log, + "Failed to associated timeout policy '%s'", + ct_info.timeout); else ct_info.nf_ct_timeout = rcu_dereference( nf_ct_timeout_find(ct_info.ct)->timeout); diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a4e3c5de998b..00dbcd4d28e6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 39945b139c48..cd0accaf844a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; - struct tc_skbmod opt = { - .index = d->tcf_index, - .refcnt = refcount_read(&d->tcf_refcnt) - ref, - .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - }; + struct tc_skbmod opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + opt.index = d->tcf_index; + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 65e05b0c98e4..60239378d43f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) notify = !sch->q.qlen && !WARN_ON_ONCE(!n && !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ - sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); + sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ff5336493777..4a2c763e2d11 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -974,6 +974,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; + sch->owner = -1; netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index b2c1b683a88e..d2b02710ab07 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -921,8 +921,6 @@ out_err: * Caller provides the truncation length of the output token (h) in * cksumout.len. * - * Note that for RPCSEC, the "initial cipher state" is always all zeroes. - * * Return values: * %GSS_S_COMPLETE: Digest computed, @cksumout filled in * %GSS_S_FAILURE: Call failed @@ -933,19 +931,22 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher, int body_offset, struct xdr_netobj *cksumout) { unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher); - static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; struct ahash_request *req; struct scatterlist sg[1]; + u8 *iv, *checksumdata; int err = -ENOMEM; - u8 *checksumdata; checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL); if (!checksumdata) return GSS_S_FAILURE; + /* For RPCSEC, the "initial cipher state" is always all zeroes. */ + iv = kzalloc(ivsize, GFP_KERNEL); + if (!iv) + goto out_free_mem; req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) - goto out_free_cksumdata; + goto out_free_mem; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); err = crypto_ahash_init(req); if (err) @@ -969,7 +970,8 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher, out_free_ahash: ahash_request_free(req); -out_free_cksumdata: +out_free_mem: + kfree(iv); kfree_sensitive(checksumdata); return err ? GSS_S_FAILURE : GSS_S_COMPLETE; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 545017a3daa4..6b3f01beb294 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1206,15 +1206,6 @@ err_noclose: * MSG_SPLICE_PAGES is used exclusively to reduce the number of * copy operations in this path. Therefore the caller must ensure * that the pages backing @xdr are unchanging. - * - * Note that the send is non-blocking. The caller has incremented - * the reference count on each page backing the RPC message, and - * the network layer will "put" these pages when transmission is - * complete. - * - * This is safe for our RPC services because the memory backing - * the head and tail components is never kmalloc'd. These always - * come from pages in the svc_rqst::rq_pages array. */ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, rpc_fraghdr marker, unsigned int *sentp) @@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 1 + count, sizeof(marker) + rqstp->rq_res.len); ret = sock_sendmsg(svsk->sk_sock, &msg); + page_frag_free(buf); if (ret < 0) return ret; *sentp += ret; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 211f57164cb6..b783231668c6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ @@ -2152,12 +2152,15 @@ recv_end: } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5b41e2321209..9a6ad5974dff 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2663,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } - } else if (!(flags & MSG_PEEK)) { + } else if (flags & MSG_PEEK) { + skb = NULL; + } else { skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) + kfree_skb(skb); skb = skb_peek(&sk->sk_receive_queue); } } @@ -2739,18 +2743,16 @@ redo: last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; +again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); - if (!skb) { + if (!skb && copied) { unix_state_unlock(sk); - if (copied) - break; - goto redo; + break; } } #endif -again: if (skb == NULL) { if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index fa39b6265238..6433a414acf8 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - total_refs = file_count(u->sk.sk_socket->file); + total_refs = file_count(sk->sk_socket->file); WARN_ON_ONCE(!u->inflight); WARN_ON_ONCE(total_refs < u->inflight); @@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work) list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock(sk); + unix_state_unlock(sk); + } } } diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 1748268e0694..ee5d306a96d0 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sgs = vsock->out_sgs; sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), @@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } + virtio_transport_deliver_tap_pkt(skb); + if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index e039e66ab377..cbbf347c6b2e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1024,7 +1024,7 @@ TRACE_EVENT(rdev_get_mpp, TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), - TP_ARGS(wiphy, netdev, _idx, mpp, dst), + TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index a161c64d1765..838ad6541a17 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -4,6 +4,7 @@ * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg + * Copyright (C) 2024 Intel Corporation * * (As all part of the Linux kernel, this file is GPL) */ @@ -662,7 +663,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) dev->ieee80211_ptr->wiphy->wext && dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev); } @@ -704,7 +706,8 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd) #ifdef CONFIG_CFG80211_WEXT if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; handlers = dev->ieee80211_ptr->wiphy->wext; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3404d076a8a3..727aa20be4bd 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; + if (optlen < sizeof(entries)) + return -EINVAL; if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 3ce5d503a6da..c5af566e911a 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -114,6 +114,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) +KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang + ifdef CONFIG_CC_IS_CLANG # Clang before clang-16 would warn on default argument promotions. ifneq ($(call clang-min-version, 160000),y) @@ -151,10 +153,6 @@ KBUILD_CFLAGS += -Wtype-limits KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Winitializer-overrides -endif - KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 else @@ -164,9 +162,7 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Wno-initializer-overrides -else +ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 8568d256d6fb..79fcf2731686 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=)) part-of-module = y quiet_cmd_cc_o_c = CC [M] $@ - cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $< + cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $< %.mod.o: %.mod.c FORCE $(call if_changed_dep,cc_o_c) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 4606944984ee..c55878bddfdd 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -414,8 +414,8 @@ class PrinterRST(Printer): version = version.stdout.decode().rstrip() except: try: - version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, - capture_output=True, check=True) + version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], + cwd=linuxRoot, capture_output=True, check=True) version = version.stdout.decode().rstrip() except: return 'Linux' diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index c5c2ce113c92..d20c47d21ad8 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -467,6 +467,8 @@ static bool stackleak_gate(void) return false; if (STRING_EQUAL(section, ".entry.text")) return false; + if (STRING_EQUAL(section, ".head.text")) + return false; } return track_frame_size >= 0; diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index b5730061872b..965bb40c50e5 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -552,11 +552,6 @@ static int conf_choice(struct menu *menu) continue; } sym_set_tristate_value(child->sym, yes); - for (child = child->list; child; child = child->next) { - indent += 2; - conf(child); - indent -= 2; - } return 1; } } diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index e69d7c59d930..e7cc9e985c4f 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -89,7 +89,7 @@ void menu_add_visibility(struct expr *dep); struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep); void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep); void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep); -void menu_finalize(struct menu *parent); +void menu_finalize(void); void menu_set_type(int type); extern struct menu rootmenu; diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c index 31d0a89fbeb7..75493302fb85 100644 --- a/scripts/kconfig/lxdialog/checklist.c +++ b/scripts/kconfig/lxdialog/checklist.c @@ -119,7 +119,7 @@ int dialog_checklist(const char *title, const char *prompt, int height, } do_resize: - if (getmaxy(stdscr) < (height + CHECKLIST_HEIGTH_MIN)) + if (getmaxy(stdscr) < (height + CHECKLIST_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) < (width + CHECKLIST_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h index 2d15ba893fbf..f6c2ebe6d1f9 100644 --- a/scripts/kconfig/lxdialog/dialog.h +++ b/scripts/kconfig/lxdialog/dialog.h @@ -162,17 +162,17 @@ int on_key_esc(WINDOW *win); int on_key_resize(void); /* minimum (re)size values */ -#define CHECKLIST_HEIGTH_MIN 6 /* For dialog_checklist() */ +#define CHECKLIST_HEIGHT_MIN 6 /* For dialog_checklist() */ #define CHECKLIST_WIDTH_MIN 6 -#define INPUTBOX_HEIGTH_MIN 2 /* For dialog_inputbox() */ +#define INPUTBOX_HEIGHT_MIN 2 /* For dialog_inputbox() */ #define INPUTBOX_WIDTH_MIN 2 -#define MENUBOX_HEIGTH_MIN 15 /* For dialog_menu() */ +#define MENUBOX_HEIGHT_MIN 15 /* For dialog_menu() */ #define MENUBOX_WIDTH_MIN 65 -#define TEXTBOX_HEIGTH_MIN 8 /* For dialog_textbox() */ +#define TEXTBOX_HEIGHT_MIN 8 /* For dialog_textbox() */ #define TEXTBOX_WIDTH_MIN 8 -#define YESNO_HEIGTH_MIN 4 /* For dialog_yesno() */ +#define YESNO_HEIGHT_MIN 4 /* For dialog_yesno() */ #define YESNO_WIDTH_MIN 4 -#define WINDOW_HEIGTH_MIN 19 /* For init_dialog() */ +#define WINDOW_HEIGHT_MIN 19 /* For init_dialog() */ #define WINDOW_WIDTH_MIN 80 int init_dialog(const char *backtitle); diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 1dcfb288ee63..3c6e24b20f5b 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -43,7 +43,7 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width strcpy(instr, init); do_resize: - if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGTH_MIN)) + if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) <= (width - INPUTBOX_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index 0e333284e947..6e6244df0c56 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -172,7 +172,7 @@ int dialog_menu(const char *title, const char *prompt, do_resize: height = getmaxy(stdscr); width = getmaxx(stdscr); - if (height < MENUBOX_HEIGTH_MIN || width < MENUBOX_WIDTH_MIN) + if (height < MENUBOX_HEIGHT_MIN || width < MENUBOX_WIDTH_MIN) return -ERRDISPLAYTOOSMALL; height -= 4; diff --git a/scripts/kconfig/lxdialog/textbox.c b/scripts/kconfig/lxdialog/textbox.c index 058ed0e5bbd5..0abaf635978f 100644 --- a/scripts/kconfig/lxdialog/textbox.c +++ b/scripts/kconfig/lxdialog/textbox.c @@ -175,7 +175,7 @@ int dialog_textbox(const char *title, const char *tbuf, int initial_height, do_resize: getmaxyx(stdscr, height, width); - if (height < TEXTBOX_HEIGTH_MIN || width < TEXTBOX_WIDTH_MIN) + if (height < TEXTBOX_HEIGHT_MIN || width < TEXTBOX_WIDTH_MIN) return -ERRDISPLAYTOOSMALL; if (initial_height != 0) height = initial_height; diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c index 3fb7508b68a2..f18e2a89f613 100644 --- a/scripts/kconfig/lxdialog/util.c +++ b/scripts/kconfig/lxdialog/util.c @@ -291,7 +291,7 @@ int init_dialog(const char *backtitle) getyx(stdscr, saved_y, saved_x); getmaxyx(stdscr, height, width); - if (height < WINDOW_HEIGTH_MIN || width < WINDOW_WIDTH_MIN) { + if (height < WINDOW_HEIGHT_MIN || width < WINDOW_WIDTH_MIN) { endwin(); return -ERRDISPLAYTOOSMALL; } diff --git a/scripts/kconfig/lxdialog/yesno.c b/scripts/kconfig/lxdialog/yesno.c index bcaac9b7bab2..b57d25e1549f 100644 --- a/scripts/kconfig/lxdialog/yesno.c +++ b/scripts/kconfig/lxdialog/yesno.c @@ -32,7 +32,7 @@ int dialog_yesno(const char *title, const char *prompt, int height, int width) WINDOW *dialog; do_resize: - if (getmaxy(stdscr) < (height + YESNO_HEIGTH_MIN)) + if (getmaxy(stdscr) < (height + YESNO_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) < (width + YESNO_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index f4bb391d50cf..c0969097447d 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -659,9 +659,9 @@ static void conf_choice(struct menu *menu) dialog_clear(); res = dialog_checklist(prompt ? prompt : "Main Menu", radiolist_instructions, - MENUBOX_HEIGTH_MIN, + MENUBOX_HEIGHT_MIN, MENUBOX_WIDTH_MIN, - CHECKLIST_HEIGTH_MIN); + CHECKLIST_HEIGHT_MIN); selected = item_activate_selected(); switch (res) { case 0: diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 8498481e6afe..3b822cd110f4 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -282,7 +282,7 @@ static void sym_check_prop(struct symbol *sym) } } -void menu_finalize(struct menu *parent) +static void _menu_finalize(struct menu *parent, bool inside_choice) { struct menu *menu, *last_menu; struct symbol *sym; @@ -296,7 +296,12 @@ void menu_finalize(struct menu *parent) * and propagate parent dependencies before moving on. */ - if (sym && sym_is_choice(sym)) { + bool is_choice = false; + + if (sym && sym_is_choice(sym)) + is_choice = true; + + if (is_choice) { if (sym->type == S_UNKNOWN) { /* find the first choice value to find out choice type */ current_entry = parent; @@ -394,7 +399,7 @@ void menu_finalize(struct menu *parent) } } - if (sym && sym_is_choice(sym)) + if (is_choice) expr_free(parentdep); /* @@ -402,8 +407,8 @@ void menu_finalize(struct menu *parent) * moving on */ for (menu = parent->list; menu; menu = menu->next) - menu_finalize(menu); - } else if (sym) { + _menu_finalize(menu, is_choice); + } else if (!inside_choice && sym) { /* * Automatic submenu creation. If sym is a symbol and A, B, C, * ... are consecutive items (symbols, menus, ifs, etc.) that @@ -463,7 +468,7 @@ void menu_finalize(struct menu *parent) /* Superset, put in submenu */ expr_free(dep2); next: - menu_finalize(menu); + _menu_finalize(menu, false); menu->parent = parent; last_menu = menu; } @@ -582,6 +587,11 @@ void menu_finalize(struct menu *parent) } } +void menu_finalize(void) +{ + _menu_finalize(&rootmenu, false); +} + bool menu_has_prompt(struct menu *menu) { if (!menu->prompt) diff --git a/scripts/kconfig/parser.y b/scripts/kconfig/parser.y index b45bfaf0a02b..7fb996612c96 100644 --- a/scripts/kconfig/parser.y +++ b/scripts/kconfig/parser.y @@ -515,7 +515,7 @@ void conf_parse(const char *name) menu_add_prompt(P_MENU, "Main menu", NULL); } - menu_finalize(&rootmenu); + menu_finalize(); menu = &rootmenu; while (menu) { diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 967f1abb0edb..cb1be22afc65 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1541,7 +1541,7 @@ sub create_parameterlist($$$$) { save_struct_actual($2); push_parameter($2, "$type $1", $arg, $file, $declaration_name); - } elsif ($param =~ m/(.*?):(\d+)/) { + } elsif ($param =~ m/(.*?):(\w+)/) { if ($type ne "") { # skip unnamed bit-fields save_struct_actual($1); push_parameter($1, "$type:$2", $arg, $file, $declaration_name) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6b37039c9e92..2f5b91da5afa 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1007,6 +1007,8 @@ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) { + Elf_Sym *new_sym; + /* If the supplied symbol has a valid name, return it */ if (is_valid_name(elf, sym)) return sym; @@ -1015,8 +1017,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) * Strive to find a better symbol name, but the resulting name may not * match the symbol referenced in the original code. */ - return symsearch_find_nearest(elf, addr, get_secindex(elf, sym), - true, 20); + new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym), + true, 20); + return new_sym ? new_sym : sym; } static bool is_executable_section(struct elf_info *elf, unsigned int secndx) diff --git a/security/security.c b/security/security.c index 7e118858b545..0a9a0ac3f266 100644 --- a/security/security.c +++ b/security/security.c @@ -1793,11 +1793,11 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry, EXPORT_SYMBOL(security_path_mknod); /** - * security_path_post_mknod() - Update inode security field after file creation + * security_path_post_mknod() - Update inode security after reg file creation * @idmap: idmap of the mount * @dentry: new file * - * Update inode security field after a file has been created. + * Update inode security field after a regular file has been created. */ void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 0619a1cbbfbe..074d6c2714eb 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2123,7 +2123,6 @@ static struct file_system_type sel_fs_type = { .kill_sb = sel_kill_sb, }; -static struct vfsmount *selinuxfs_mount __ro_after_init; struct path selinux_null __ro_after_init; static int __init init_sel_fs(void) @@ -2145,18 +2144,21 @@ static int __init init_sel_fs(void) return err; } - selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type); - if (IS_ERR(selinuxfs_mount)) { + selinux_null.mnt = kern_mount(&sel_fs_type); + if (IS_ERR(selinux_null.mnt)) { pr_err("selinuxfs: could not mount!\n"); - err = PTR_ERR(selinuxfs_mount); - selinuxfs_mount = NULL; + err = PTR_ERR(selinux_null.mnt); + selinux_null.mnt = NULL; + return err; } + selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root, &null_name); if (IS_ERR(selinux_null.dentry)) { pr_err("selinuxfs: could not lookup null!\n"); err = PTR_ERR(selinux_null.dentry); selinux_null.dentry = NULL; + return err; } return err; diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index b8ff5cccd0c8..5431d2c49421 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -158,7 +158,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, struct device_node *child, *sound = NULL; struct resource *r; int i, layout = 0, rlen, ok = force; - char node_name[6]; + char node_name[8]; static const char *rnames[] = { "i2sbus: %pOFn (control)", "i2sbus: %pOFn (tx)", "i2sbus: %pOFn (rx)" }; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index b141024830ec..ee6ac649df83 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -428,7 +428,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest, midi1->note.group = midi2->note.group; midi1->note.status = midi2->note.status; midi1->note.channel = midi2->note.channel; - switch (midi2->note.status << 4) { + switch (midi2->note.status) { case UMP_MSG_STATUS_NOTE_ON: case UMP_MSG_STATUS_NOTE_OFF: midi1->note.note = midi2->note.note; diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 696a958d93e9..088cff799e0b 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -343,3 +343,29 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, return NULL; } EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob); + +int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id) +{ + struct nhlt_endpoint *epnt; + int i; + + if (!nhlt) + return -EINVAL; + + epnt = (struct nhlt_endpoint *)nhlt->desc; + for (i = 0; i < nhlt->endpoint_count; i++) { + /* for SSP link the virtual bus id is the SSP port number */ + if (epnt->linktype == NHLT_LINK_SSP && + epnt->virtual_bus_id == virtual_bus_id) { + dev_dbg(dev, "SSP%d: dev_type=%d\n", virtual_bus_id, + epnt->device_type); + return epnt->device_type; + } + + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } + + return -EINVAL; +} +EXPORT_SYMBOL(intel_nhlt_ssp_device_type); diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c index 0ba8f0c4cd99..3a593da09280 100644 --- a/sound/oss/dmasound/dmasound_paula.c +++ b/sound/oss/dmasound/dmasound_paula.c @@ -725,7 +725,13 @@ static void __exit amiga_audio_remove(struct platform_device *pdev) dmasound_deinit(); } -static struct platform_driver amiga_audio_driver = { +/* + * amiga_audio_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver amiga_audio_driver __refdata = { .remove_new = __exit_p(amiga_audio_remove), .driver = { .name = "amiga-audio", diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index d36234b88fb4..941bfbf812ed 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw, /* check if sample is finished playing (non-looping only) */ if (bp != best + V_OFF && bp != best + V_FREE && (vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) { - val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64; + val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch); if (val >= vp->reg.loopstart) bp = best + V_OFF; } @@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp) map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); - addr = vp->reg.start + 64; + addr = vp->reg.start; temp = vp->reg.parm.filterQ; ccca = (temp << 28) | addr; if (vp->apitch < 0xe400) @@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp) /* Q & current address (Q 4bit value, MSB) */ CCCA, ccca, - /* cache */ - CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64), - /* reset volume */ VTFT, vtarget | vp->ftarget, CVCF, vtarget | CVCF_CURRENTFILTER_MASK, diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 72ec872afb8d..8fb688e41414 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -108,7 +108,10 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, { "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "10433A60", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, + { "17AA3877", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, + { "17AA3878", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, { "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, { "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, @@ -496,7 +499,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431F12", generic_dsd_config }, { "CSC3551", "10431F1F", generic_dsd_config }, { "CSC3551", "10431F62", generic_dsd_config }, + { "CSC3551", "10433A60", generic_dsd_config }, { "CSC3551", "17AA386F", generic_dsd_config }, + { "CSC3551", "17AA3877", generic_dsd_config }, + { "CSC3551", "17AA3878", generic_dsd_config }, { "CSC3551", "17AA38A9", generic_dsd_config }, { "CSC3551", "17AA38AB", generic_dsd_config }, { "CSC3551", "17AA38B4", generic_dsd_config }, diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 41974b3897a7..1a3f84599cb5 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1024,8 +1024,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) goto err; } - dev_dbg(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n", - cs35l56->system_name, cs35l56->amp_name); + dev_info(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n", + cs35l56->system_name, cs35l56->amp_name); regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config, ARRAY_SIZE(cs35l56_hda_dai_config)); @@ -1045,14 +1045,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_enable(cs35l56->base.dev); + cs35l56->base.init_done = true; + ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops); if (ret) { dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret); goto pm_err; } - cs35l56->base.init_done = true; - return 0; pm_err: diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c index 13beee807308..40f2f97944d5 100644 --- a/sound/pci/hda/cs35l56_hda_i2c.c +++ b/sound/pci/hda/cs35l56_hda_i2c.c @@ -56,10 +56,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = { {} }; +static const struct acpi_device_id cs35l56_acpi_hda_match[] = { + { "CSC3554", 0 }, + { "CSC3556", 0 }, + { "CSC3557", 0 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match); + static struct i2c_driver cs35l56_hda_i2c_driver = { .driver = { - .name = "cs35l56-hda", - .pm = &cs35l56_hda_pm_ops, + .name = "cs35l56-hda", + .acpi_match_table = cs35l56_acpi_hda_match, + .pm = &cs35l56_hda_pm_ops, }, .id_table = cs35l56_hda_i2c_id, .probe = cs35l56_hda_i2c_probe, diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c index a3b2fa76663d..7f02155fe61e 100644 --- a/sound/pci/hda/cs35l56_hda_spi.c +++ b/sound/pci/hda/cs35l56_hda_spi.c @@ -56,10 +56,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = { {} }; +static const struct acpi_device_id cs35l56_acpi_hda_match[] = { + { "CSC3554", 0 }, + { "CSC3556", 0 }, + { "CSC3557", 0 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match); + static struct spi_driver cs35l56_hda_spi_driver = { .driver = { - .name = "cs35l56-hda", - .pm = &cs35l56_hda_pm_ops, + .name = "cs35l56-hda", + .acpi_match_table = cs35l56_acpi_hda_match, + .pm = &cs35l56_hda_pm_ops, }, .id_table = cs35l56_hda_spi_id, .probe = cs35l56_hda_spi_probe, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a17c36a36aa5..70d80b6af3fe 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6875,11 +6875,38 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st comp_generic_fixup(cdc, action, "i2c", "CLSA0101", "-%s:00-cs35l41-hda.%d", 2); } +static void cs35l56_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) +{ + comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 2); +} + +static void cs35l56_fixup_i2c_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action) +{ + comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 4); +} + +static void cs35l56_fixup_spi_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) +{ + comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 2); +} + static void cs35l56_fixup_spi_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 4); } +static void alc285_fixup_asus_ga403u(struct hda_codec *cdc, const struct hda_fixup *fix, int action) +{ + /* + * The same SSID has been re-used in different hardware, they have + * different codecs and the newer GA403U has a ALC285. + */ + if (cdc->core.vendor_id == 0x10ec0285) + cs35l56_fixup_i2c_two(cdc, fix, action); + else + alc_fixup_inv_dmic(cdc, fix, action); +} + static void tas2781_fixup_i2c(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { @@ -7436,6 +7463,14 @@ enum { ALC256_FIXUP_ACER_SFG16_MICMUTE_LED, ALC256_FIXUP_HEADPHONE_AMP_VOL, ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX, + ALC285_FIXUP_CS35L56_SPI_2, + ALC285_FIXUP_CS35L56_I2C_2, + ALC285_FIXUP_CS35L56_I2C_4, + ALC285_FIXUP_ASUS_GA403U, + ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC, + ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1, + ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC, + ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1 }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9643,6 +9678,54 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_spectre_x360_eu0xxx, }, + [ALC285_FIXUP_CS35L56_SPI_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l56_fixup_spi_two, + }, + [ALC285_FIXUP_CS35L56_I2C_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l56_fixup_i2c_two, + }, + [ALC285_FIXUP_CS35L56_I2C_4] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l56_fixup_i2c_four, + }, + [ALC285_FIXUP_ASUS_GA403U] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_asus_ga403u, + }, + [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, + { 0x1b, 0x03a11c30 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1 + }, + [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC, + }, + [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, + { 0x1b, 0x03a11c30 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_CS35L56_SPI_2 + }, + [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_GA403U, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10037,6 +10120,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), @@ -10096,7 +10181,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), - SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2), @@ -10104,6 +10189,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), @@ -10115,11 +10201,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606", ALC285_FIXUP_CS35L56_I2C_4), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), + SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_CS35L56_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_CS35L56_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), @@ -10133,7 +10222,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), @@ -10159,7 +10248,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x12f6, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), + SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), @@ -10177,6 +10266,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -10282,6 +10372,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), @@ -10333,6 +10424,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), + SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C), @@ -10341,8 +10434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), @@ -10403,6 +10496,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 4475cea8e9f7..75f7674c66ee 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -89,7 +89,7 @@ struct tas2781_hda { struct snd_kcontrol *dsp_prog_ctl; struct snd_kcontrol *dsp_conf_ctl; struct snd_kcontrol *prof_ctl; - struct snd_kcontrol *snd_ctls[3]; + struct snd_kcontrol *snd_ctls[2]; }; static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data) @@ -161,8 +161,6 @@ static void tas2781_hda_playback_hook(struct device *dev, int action) pm_runtime_put_autosuspend(dev); break; default: - dev_dbg(tas_hda->dev, "Playback action not supported: %d\n", - action); break; } } @@ -185,8 +183,15 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol, { struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas_priv->codec_lock); + ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id; + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", + __func__, kcontrol->id.name, tas_priv->rcabin.profile_cfg_id); + + mutex_unlock(&tas_priv->codec_lock); + return 0; } @@ -200,11 +205,19 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol, val = clamp(nr_profile, 0, max); + mutex_lock(&tas_priv->codec_lock); + + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", + __func__, kcontrol->id.name, + tas_priv->rcabin.profile_cfg_id, val); + if (tas_priv->rcabin.profile_cfg_id != val) { tas_priv->rcabin.profile_cfg_id = val; ret = 1; } + mutex_unlock(&tas_priv->codec_lock); + return ret; } @@ -241,8 +254,15 @@ static int tasdevice_program_get(struct snd_kcontrol *kcontrol, { struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas_priv->codec_lock); + ucontrol->value.integer.value[0] = tas_priv->cur_prog; + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", + __func__, kcontrol->id.name, tas_priv->cur_prog); + + mutex_unlock(&tas_priv->codec_lock); + return 0; } @@ -257,11 +277,18 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol, val = clamp(nr_program, 0, max); + mutex_lock(&tas_priv->codec_lock); + + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", + __func__, kcontrol->id.name, tas_priv->cur_prog, val); + if (tas_priv->cur_prog != val) { tas_priv->cur_prog = val; ret = 1; } + mutex_unlock(&tas_priv->codec_lock); + return ret; } @@ -270,8 +297,15 @@ static int tasdevice_config_get(struct snd_kcontrol *kcontrol, { struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas_priv->codec_lock); + ucontrol->value.integer.value[0] = tas_priv->cur_conf; + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", + __func__, kcontrol->id.name, tas_priv->cur_conf); + + mutex_unlock(&tas_priv->codec_lock); + return 0; } @@ -286,54 +320,39 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol, val = clamp(nr_config, 0, max); + mutex_lock(&tas_priv->codec_lock); + + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", + __func__, kcontrol->id.name, tas_priv->cur_conf, val); + if (tas_priv->cur_conf != val) { tas_priv->cur_conf = val; ret = 1; } + mutex_unlock(&tas_priv->codec_lock); + return ret; } -/* - * tas2781_digital_getvol - get the volum control - * @kcontrol: control pointer - * @ucontrol: User data - * Customer Kcontrol for tas2781 is primarily for regmap booking, paging - * depends on internal regmap mechanism. - * tas2781 contains book and page two-level register map, especially - * book switching will set the register BXXP00R7F, after switching to the - * correct book, then leverage the mechanism for paging to access the - * register. - */ -static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); - struct soc_mixer_control *mc = - (struct soc_mixer_control *)kcontrol->private_value; - - return tasdevice_digital_getvol(tas_priv, ucontrol, mc); -} - static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; + int ret; - return tasdevice_amp_getvol(tas_priv, ucontrol, mc); -} + mutex_lock(&tas_priv->codec_lock); -static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); - struct soc_mixer_control *mc = - (struct soc_mixer_control *)kcontrol->private_value; + ret = tasdevice_amp_getvol(tas_priv, ucontrol, mc); - /* The check of the given value is in tasdevice_digital_putvol. */ - return tasdevice_digital_putvol(tas_priv, ucontrol, mc); + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %ld\n", + __func__, kcontrol->id.name, ucontrol->value.integer.value[0]); + + mutex_unlock(&tas_priv->codec_lock); + + return ret; } static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol, @@ -342,9 +361,19 @@ static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol, struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; + int ret; + + mutex_lock(&tas_priv->codec_lock); + + dev_dbg(tas_priv->dev, "%s: kcontrol %s: -> %ld\n", + __func__, kcontrol->id.name, ucontrol->value.integer.value[0]); /* The check of the given value is in tasdevice_amp_putvol. */ - return tasdevice_amp_putvol(tas_priv, ucontrol, mc); + ret = tasdevice_amp_putvol(tas_priv, ucontrol, mc); + + mutex_unlock(&tas_priv->codec_lock); + + return ret; } static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol, @@ -352,9 +381,13 @@ static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol, { struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas_priv->codec_lock); + ucontrol->value.integer.value[0] = (int)tas_priv->force_fwload_status; - dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__, - tas_priv->force_fwload_status ? "ON" : "OFF"); + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n", + __func__, kcontrol->id.name, tas_priv->force_fwload_status); + + mutex_unlock(&tas_priv->codec_lock); return 0; } @@ -365,14 +398,20 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol, struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol); bool change, val = (bool)ucontrol->value.integer.value[0]; + mutex_lock(&tas_priv->codec_lock); + + dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n", + __func__, kcontrol->id.name, + tas_priv->force_fwload_status, val); + if (tas_priv->force_fwload_status == val) change = false; else { change = true; tas_priv->force_fwload_status = val; } - dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__, - tas_priv->force_fwload_status ? "ON" : "OFF"); + + mutex_unlock(&tas_priv->codec_lock); return change; } @@ -381,9 +420,6 @@ static const struct snd_kcontrol_new tas2781_snd_controls[] = { ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), - ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL, - 0, 0, 200, 1, tas2781_digital_getvol, - tas2781_digital_putvol, dvc_tlv), ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, tas2781_force_fwload_get, tas2781_force_fwload_put), }; @@ -478,10 +514,10 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv) static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) { static const unsigned char page_array[CALIB_MAX] = { - 0x17, 0x18, 0x18, 0x0d, 0x18 + 0x17, 0x18, 0x18, 0x13, 0x18, }; static const unsigned char rgno_array[CALIB_MAX] = { - 0x74, 0x0c, 0x14, 0x3c, 0x7c + 0x74, 0x0c, 0x14, 0x70, 0x7c, }; unsigned char *data; int i, j, rc; diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 320ac792c7fe..3182c634464d 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work) dreamcastcard->clicks++; if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER)) dreamcastcard->clicks %= AICA_PERIOD_NUMBER; - mod_timer(&dreamcastcard->timer, jiffies + 1); + if (snd_pcm_running(dreamcastcard->substream)) + mod_timer(&dreamcastcard->timer, jiffies + 1); } } @@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t) /*timer function - so cannot sleep */ int play_period; struct snd_pcm_runtime *runtime; + if (!snd_pcm_running(substream)) + return; runtime = substream->runtime; dreamcastcard = substream->pcm->private_data; /* Have we played out an additional period? */ @@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream return 0; } +static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct snd_card_aica *dreamcastcard = substream->pcm->private_data; + + del_timer_sync(&dreamcastcard->timer); + cancel_work_sync(&dreamcastcard->spu_dma_work); + return 0; +} + static int snd_aicapcm_pcm_close(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - flush_work(&(dreamcastcard->spu_dma_work)); - del_timer(&dreamcastcard->timer); dreamcastcard->substream = NULL; kfree(dreamcastcard->channel); spu_disable(); @@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = { .prepare = snd_aicapcm_pcm_prepare, .trigger = snd_aicapcm_pcm_trigger, .pointer = snd_aicapcm_pcm_pointer, + .sync_stop = snd_aicapcm_pcm_sync_stop, }; /* TO DO: set up to handle more than one pcm instance */ diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c index 8c8b1dcac628..5f35b90eab8d 100644 --- a/sound/soc/amd/acp/acp-pci.c +++ b/sound/soc/amd/acp/acp-pci.c @@ -115,7 +115,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id goto unregister_dmic_dev; } - acp_init(chip); + ret = acp_init(chip); + if (ret) + goto unregister_dmic_dev; + res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL); if (!res) { ret = -ENOMEM; @@ -133,11 +136,9 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id } } - if (flag == FLAG_AMD_LEGACY_ONLY_DMIC) { - ret = check_acp_pdm(pci, chip); - if (ret < 0) - goto skip_pdev_creation; - } + ret = check_acp_pdm(pci, chip); + if (ret < 0) + goto skip_pdev_creation; chip->flag = flag; memset(&pdevinfo, 0, sizeof(pdevinfo)); diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 01ef4db5407d..287ac01a3873 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -56,6 +56,11 @@ static int _cs_amp_write_cal_coeffs(struct cs_dsp *dsp, dev_dbg(dsp->dev, "Calibration: Ambient=%#x, Status=%#x, CalR=%d\n", data->calAmbient, data->calStatus, data->calR); + if (list_empty(&dsp->ctl_list)) { + dev_info(dsp->dev, "Calibration disabled due to missing firmware controls\n"); + return -ENOENT; + } + ret = cs_amp_write_cal_coeff(dsp, controls, controls->ambient, data->calAmbient); if (ret) return ret; diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 860d5cda67bf..94685449f0f4 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2364,7 +2364,8 @@ static int cs42l43_codec_runtime_resume(struct device *dev) static int cs42l43_codec_suspend(struct device *dev) { - struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + struct cs42l43_codec *priv = dev_get_drvdata(dev); + struct cs42l43 *cs42l43 = priv->core; disable_irq(cs42l43->irq); @@ -2373,7 +2374,8 @@ static int cs42l43_codec_suspend(struct device *dev) static int cs42l43_codec_suspend_noirq(struct device *dev) { - struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + struct cs42l43_codec *priv = dev_get_drvdata(dev); + struct cs42l43 *cs42l43 = priv->core; enable_irq(cs42l43->irq); @@ -2382,7 +2384,8 @@ static int cs42l43_codec_suspend_noirq(struct device *dev) static int cs42l43_codec_resume(struct device *dev) { - struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + struct cs42l43_codec *priv = dev_get_drvdata(dev); + struct cs42l43 *cs42l43 = priv->core; enable_irq(cs42l43->irq); @@ -2391,7 +2394,8 @@ static int cs42l43_codec_resume(struct device *dev) static int cs42l43_codec_resume_noirq(struct device *dev) { - struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + struct cs42l43_codec *priv = dev_get_drvdata(dev); + struct cs42l43 *cs42l43 = priv->core; disable_irq(cs42l43->irq); diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 15289dadafea..17bd6b516077 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -412,9 +412,9 @@ static const struct _coeff_div coeff_div_v3[] = { {125, 48000, 6000000, 0x04, 0x04, 0x1F, 0x2D, 0x8A, 0x0A, 0x27, 0x27}, {128, 8000, 1024000, 0x60, 0x00, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F}, - {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x3F}, - {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, - {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, + {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x3F}, + {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, + {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, {144, 8000, 1152000, 0x20, 0x00, 0x03, 0x35, 0x8A, 0x1B, 0x23, 0x47}, {144, 16000, 2304000, 0x20, 0x00, 0x11, 0x35, 0x8A, 0x1B, 0x23, 0x47}, {192, 8000, 1536000, 0x60, 0x02, 0x0D, 0x75, 0x8A, 0x1B, 0x1F, 0x7F}, @@ -423,10 +423,10 @@ static const struct _coeff_div coeff_div_v3[] = { {200, 48000, 9600000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, {250, 48000, 12000000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x27, 0x27}, - {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x7F}, - {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F}, - {256, 44100, 11289600, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, - {256, 48000, 12288000, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, + {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x7F}, + {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x08, 0x19, 0x1F, 0x3F}, + {256, 44100, 11289600, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, + {256, 48000, 12288000, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, {288, 8000, 2304000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x23, 0x47}, {384, 8000, 3072000, 0x60, 0x02, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F}, {384, 16000, 6144000, 0x20, 0x02, 0x03, 0x35, 0x8A, 0x1B, 0x1F, 0x3F}, @@ -435,10 +435,10 @@ static const struct _coeff_div coeff_div_v3[] = { {400, 48000, 19200000, 0xE4, 0x04, 0x35, 0x6d, 0xCA, 0x0A, 0x1F, 0x1F}, {500, 48000, 24000000, 0xF8, 0x04, 0x3F, 0x6D, 0xCA, 0x0A, 0x1F, 0x1F}, - {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x7F}, - {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x8A, 0x1B, 0x1F, 0x3F}, - {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, - {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, + {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x08, 0x19, 0x1B, 0x1F, 0x7F}, + {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x08, 0x19, 0x1F, 0x3F}, + {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, + {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F}, {768, 8000, 6144000, 0x60, 0x02, 0x11, 0x35, 0x8A, 0x1B, 0x1F, 0x7F}, {768, 16000, 12288000, 0x20, 0x02, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F}, {768, 32000, 24576000, 0xE0, 0x02, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F}, @@ -835,7 +835,6 @@ static void es8326_jack_detect_handler(struct work_struct *work) dev_dbg(comp->dev, "Report hp remove event\n"); snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET); /* mute adc when mic path switch */ - regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33); regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44); regmap_write(es8326->regmap, ES8326_ADC2_SRC, 0x66); es8326->hp = 0; @@ -843,6 +842,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a); regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9); /* * Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event */ @@ -865,6 +865,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) * set auto-check mode, then restart jack_detect_work after 400ms. * Don't report jack status. */ + regmap_write(es8326->regmap, ES8326_INT_SOURCE, + (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); @@ -891,7 +893,6 @@ static void es8326_jack_detect_handler(struct work_struct *work) snd_soc_jack_report(es8326->jack, SND_JACK_HEADSET, SND_JACK_HEADSET); - regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33); regmap_update_bits(es8326->regmap, ES8326_PGA_PDN, 0x08, 0x08); regmap_update_bits(es8326->regmap, ES8326_PGAGAIN, @@ -987,7 +988,7 @@ static int es8326_resume(struct snd_soc_component *component) regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); - regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9); + regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9); regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb); /* set headphone default type and detect pin */ regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83); @@ -1038,8 +1039,7 @@ static int es8326_resume(struct snd_soc_component *component) es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); - regmap_write(es8326->regmap, ES8326_INT_SOURCE, - (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9); regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk); regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, @@ -1060,6 +1060,8 @@ static int es8326_resume(struct snd_soc_component *component) es8326->hp = 0; es8326->hpl_vol = 0x03; es8326->hpr_vol = 0x03; + + es8326_irq(es8326->irq, es8326); return 0; } @@ -1070,6 +1072,9 @@ static int es8326_suspend(struct snd_soc_component *component) cancel_delayed_work_sync(&es8326->jack_detect_work); es8326_disable_micbias(component); es8326->calibrated = false; + regmap_write(es8326->regmap, ES8326_CLK_MUX, 0x2d); + regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x00); + regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b); regmap_write(es8326->regmap, ES8326_CLK_CTL, ES8326_CLK_OFF); regcache_cache_only(es8326->regmap, true); regcache_mark_dirty(es8326->regmap); diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h index ee12caef8105..c3e52e7bdef5 100644 --- a/sound/soc/codecs/es8326.h +++ b/sound/soc/codecs/es8326.h @@ -104,7 +104,7 @@ #define ES8326_MUTE (3 << 0) /* ES8326_CLK_CTL */ -#define ES8326_CLK_ON (0x7e << 0) +#define ES8326_CLK_ON (0x7f << 0) #define ES8326_CLK_OFF (0 << 0) /* ES8326_CLK_INV */ diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c index 47511f70119a..0b3bf920bcab 100644 --- a/sound/soc/codecs/rt1316-sdw.c +++ b/sound/soc/codecs/rt1316-sdw.c @@ -537,7 +537,7 @@ static int rt1316_sdw_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt1316->sdw_slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -577,12 +577,12 @@ static int rt1316_sdw_parse_dt(struct rt1316_sdw_priv *rt1316, struct device *de if (rt1316->bq_params_cnt) { rt1316->bq_params = devm_kzalloc(dev, rt1316->bq_params_cnt, GFP_KERNEL); if (!rt1316->bq_params) { - dev_err(dev, "Could not allocate bq_params memory\n"); + dev_err(dev, "%s: Could not allocate bq_params memory\n", __func__); ret = -ENOMEM; } else { ret = device_property_read_u8_array(dev, "realtek,bq-params", rt1316->bq_params, rt1316->bq_params_cnt); if (ret < 0) - dev_err(dev, "Could not read list of realtek,bq-params\n"); + dev_err(dev, "%s: Could not read list of realtek,bq-params\n", __func__); } } @@ -759,7 +759,7 @@ static int __maybe_unused rt1316_dev_resume(struct device *dev) time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT1316_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt1318-sdw.c b/sound/soc/codecs/rt1318-sdw.c index ff364bde4a08..462c9a4b1be5 100644 --- a/sound/soc/codecs/rt1318-sdw.c +++ b/sound/soc/codecs/rt1318-sdw.c @@ -606,7 +606,7 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt1318->sdw_slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -631,8 +631,8 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream, sampling_rate = RT1318_SDCA_RATE_192000HZ; break; default: - dev_err(component->dev, "Rate %d is not supported\n", - params_rate(params)); + dev_err(component->dev, "%s: Rate %d is not supported\n", + __func__, params_rate(params)); return -EINVAL; } @@ -835,7 +835,7 @@ static int __maybe_unused rt1318_dev_resume(struct device *dev) time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT1318_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); return -ETIMEDOUT; } diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index e67c2e19cb1a..f9ee42c13dba 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -132,7 +132,7 @@ static int rt5682_sdw_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt5682->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -315,8 +315,8 @@ static int rt5682_sdw_init(struct device *dev, struct regmap *regmap, &rt5682_sdw_indirect_regmap); if (IS_ERR(rt5682->regmap)) { ret = PTR_ERR(rt5682->regmap); - dev_err(dev, "Failed to allocate register map: %d\n", - ret); + dev_err(dev, "%s: Failed to allocate register map: %d\n", + __func__, ret); return ret; } @@ -400,7 +400,7 @@ static int rt5682_io_init(struct device *dev, struct sdw_slave *slave) } if (val != DEVICE_ID) { - dev_err(dev, "Device with ID register %x is not rt5682\n", val); + dev_err(dev, "%s: Device with ID register %x is not rt5682\n", __func__, val); ret = -ENODEV; goto err_nodev; } @@ -648,7 +648,7 @@ static int rt5682_bus_config(struct sdw_slave *slave, ret = rt5682_clock_config(&slave->dev); if (ret < 0) - dev_err(&slave->dev, "Invalid clk config"); + dev_err(&slave->dev, "%s: Invalid clk config", __func__); return ret; } @@ -763,19 +763,19 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt5682->disable_irq_lock); if (rt5682->disable_irq == true) { - mutex_lock(&rt5682->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt5682->disable_irq = false; - mutex_unlock(&rt5682->disable_irq_lock); } + mutex_unlock(&rt5682->disable_irq_lock); goto regmap_sync; } time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT5682_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index 0ebf344a1b60..434b926f96c8 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -37,8 +37,8 @@ static int rt700_index_write(struct regmap *regmap, ret = regmap_write(regmap, addr, value); if (ret < 0) - pr_err("Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -52,8 +52,8 @@ static int rt700_index_read(struct regmap *regmap, *value = 0; ret = regmap_read(regmap, addr, value); if (ret < 0) - pr_err("Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -930,14 +930,14 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream, port_config.num += 2; break; default: - dev_err(component->dev, "Invalid DAI id %d\n", dai->id); + dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id); return -EINVAL; } retval = sdw_stream_add_slave(rt700->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -945,8 +945,8 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream, /* bit 3:0 Number of Channel */ val |= (params_channels(params) - 1); } else { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 935e597022d3..2636c2eea4bc 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -438,20 +438,20 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT711_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 447154cb6010..1e8dbfc3ecd9 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -36,8 +36,8 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711, ret = regmap_write(regmap, addr, value); if (ret < 0) dev_err(&rt711->slave->dev, - "Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + "%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -52,8 +52,8 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711, ret = regmap_read(regmap, addr, value); if (ret < 0) dev_err(&rt711->slave->dev, - "Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + "%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -1293,13 +1293,13 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt711->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } if (params_channels(params) > 16) { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } @@ -1318,8 +1318,8 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream, sampling_rate = RT711_SDCA_RATE_192000HZ; break; default: - dev_err(component->dev, "Rate %d is not supported\n", - params_rate(params)); + dev_err(component->dev, "%s: Rate %d is not supported\n", + __func__, params_rate(params)); return -EINVAL; } diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 3f5773310ae8..0d3b43dd22e6 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -408,7 +408,7 @@ static int rt711_bus_config(struct sdw_slave *slave, ret = rt711_clock_config(&slave->dev); if (ret < 0) - dev_err(&slave->dev, "Invalid clk config"); + dev_err(&slave->dev, "%s: Invalid clk config", __func__); return ret; } @@ -536,19 +536,19 @@ static int __maybe_unused rt711_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT711_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); return -ETIMEDOUT; } diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 66eaed13b0d6..5446f9506a16 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -37,8 +37,8 @@ static int rt711_index_write(struct regmap *regmap, ret = regmap_write(regmap, addr, value); if (ret < 0) - pr_err("Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -52,8 +52,8 @@ static int rt711_index_read(struct regmap *regmap, *value = 0; ret = regmap_read(regmap, addr, value); if (ret < 0) - pr_err("Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -428,7 +428,7 @@ static void rt711_jack_init(struct rt711_priv *rt711) RT711_HP_JD_FINAL_RESULT_CTL_JD12); break; default: - dev_warn(rt711->component->dev, "Wrong JD source\n"); + dev_warn(rt711->component->dev, "%s: Wrong JD source\n", __func__); break; } @@ -1020,7 +1020,7 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt711->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -1028,8 +1028,8 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream, /* bit 3:0 Number of Channel */ val |= (params_channels(params) - 1); } else { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } diff --git a/sound/soc/codecs/rt712-sdca-dmic.c b/sound/soc/codecs/rt712-sdca-dmic.c index 0926b26619bd..012b79e72cf6 100644 --- a/sound/soc/codecs/rt712-sdca-dmic.c +++ b/sound/soc/codecs/rt712-sdca-dmic.c @@ -139,8 +139,8 @@ static int rt712_sdca_dmic_index_write(struct rt712_sdca_dmic_priv *rt712, ret = regmap_write(regmap, addr, value); if (ret < 0) dev_err(&rt712->slave->dev, - "Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + "%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -155,8 +155,8 @@ static int rt712_sdca_dmic_index_read(struct rt712_sdca_dmic_priv *rt712, ret = regmap_read(regmap, addr, value); if (ret < 0) dev_err(&rt712->slave->dev, - "Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + "%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -317,7 +317,8 @@ static int rt712_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol, for (i = 0; i < p->count; i++) { err = regmap_write(rt712->mbq_regmap, p->reg_base + i, gain_val[i]); if (err < 0) - dev_err(&rt712->slave->dev, "0x%08x can't be set\n", p->reg_base + i); + dev_err(&rt712->slave->dev, "%s: 0x%08x can't be set\n", + __func__, p->reg_base + i); } return changed; @@ -667,13 +668,13 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt712->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } if (params_channels(params) > 4) { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } @@ -698,8 +699,8 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream, sampling_rate = RT712_SDCA_RATE_192000HZ; break; default: - dev_err(component->dev, "Rate %d is not supported\n", - params_rate(params)); + dev_err(component->dev, "%s: Rate %d is not supported\n", + __func__, params_rate(params)); return -EINVAL; } @@ -923,7 +924,8 @@ static int __maybe_unused rt712_sdca_dmic_dev_resume(struct device *dev) time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT712_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", + __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c index 01ac555cd79b..4e9ab3ef135b 100644 --- a/sound/soc/codecs/rt712-sdca-sdw.c +++ b/sound/soc/codecs/rt712-sdca-sdw.c @@ -438,20 +438,21 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt712->disable_irq_lock); if (rt712->disable_irq == true) { - mutex_lock(&rt712->disable_irq_lock); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt712->disable_irq = false; - mutex_unlock(&rt712->disable_irq_lock); } + mutex_unlock(&rt712->disable_irq_lock); goto regmap_sync; } time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT712_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c index 6954fbe7ec5f..b503de9fda80 100644 --- a/sound/soc/codecs/rt712-sdca.c +++ b/sound/soc/codecs/rt712-sdca.c @@ -34,8 +34,8 @@ static int rt712_sdca_index_write(struct rt712_sdca_priv *rt712, ret = regmap_write(regmap, addr, value); if (ret < 0) dev_err(&rt712->slave->dev, - "Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + "%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -50,8 +50,8 @@ static int rt712_sdca_index_read(struct rt712_sdca_priv *rt712, ret = regmap_read(regmap, addr, value); if (ret < 0) dev_err(&rt712->slave->dev, - "Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + "%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -1060,13 +1060,13 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt712->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } if (params_channels(params) > 16) { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } @@ -1085,8 +1085,8 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream, sampling_rate = RT712_SDCA_RATE_192000HZ; break; default: - dev_err(component->dev, "Rate %d is not supported\n", - params_rate(params)); + dev_err(component->dev, "%s: Rate %d is not supported\n", + __func__, params_rate(params)); return -EINVAL; } @@ -1106,7 +1106,7 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream, sampling_rate); break; default: - dev_err(component->dev, "Wrong DAI id\n"); + dev_err(component->dev, "%s: Wrong DAI id\n", __func__); return -EINVAL; } diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c index ab54a67a27eb..ee450126106f 100644 --- a/sound/soc/codecs/rt715-sdca-sdw.c +++ b/sound/soc/codecs/rt715-sdca-sdw.c @@ -237,7 +237,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev) time = wait_for_completion_timeout(&slave->enumeration_complete, msecs_to_jiffies(RT715_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Enumeration not complete, timed out\n"); + dev_err(&slave->dev, "%s: Enumeration not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index 4533eedd7e18..3fb7b9adb61d 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -41,8 +41,8 @@ static int rt715_sdca_index_write(struct rt715_sdca_priv *rt715, ret = regmap_write(regmap, addr, value); if (ret < 0) dev_err(&rt715->slave->dev, - "Failed to set private value: %08x <= %04x %d\n", - addr, value, ret); + "%s: Failed to set private value: %08x <= %04x %d\n", + __func__, addr, value, ret); return ret; } @@ -59,8 +59,8 @@ static int rt715_sdca_index_read(struct rt715_sdca_priv *rt715, ret = regmap_read(regmap, addr, value); if (ret < 0) dev_err(&rt715->slave->dev, - "Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + "%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -152,8 +152,8 @@ static int rt715_sdca_set_amp_gain_put(struct snd_kcontrol *kcontrol, mc->shift); ret = regmap_write(rt715->mbq_regmap, mc->reg + i, gain_val); if (ret != 0) { - dev_err(component->dev, "Failed to write 0x%x=0x%x\n", - mc->reg + i, gain_val); + dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n", + __func__, mc->reg + i, gain_val); return ret; } } @@ -188,8 +188,8 @@ static int rt715_sdca_set_amp_gain_4ch_put(struct snd_kcontrol *kcontrol, ret = regmap_write(rt715->mbq_regmap, reg_base + i, gain_val); if (ret != 0) { - dev_err(component->dev, "Failed to write 0x%x=0x%x\n", - reg_base + i, gain_val); + dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n", + __func__, reg_base + i, gain_val); return ret; } } @@ -224,8 +224,8 @@ static int rt715_sdca_set_amp_gain_8ch_put(struct snd_kcontrol *kcontrol, reg = i < 7 ? reg_base + i : (reg_base - 1) | BIT(15); ret = regmap_write(rt715->mbq_regmap, reg, gain_val); if (ret != 0) { - dev_err(component->dev, "Failed to write 0x%x=0x%x\n", - reg, gain_val); + dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n", + __func__, reg, gain_val); return ret; } } @@ -246,8 +246,8 @@ static int rt715_sdca_set_amp_gain_get(struct snd_kcontrol *kcontrol, for (i = 0; i < 2; i++) { ret = regmap_read(rt715->mbq_regmap, mc->reg + i, &val); if (ret < 0) { - dev_err(component->dev, "Failed to read 0x%x, ret=%d\n", - mc->reg + i, ret); + dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n", + __func__, mc->reg + i, ret); return ret; } ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, mc->shift); @@ -271,8 +271,8 @@ static int rt715_sdca_set_amp_gain_4ch_get(struct snd_kcontrol *kcontrol, for (i = 0; i < 4; i++) { ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val); if (ret < 0) { - dev_err(component->dev, "Failed to read 0x%x, ret=%d\n", - reg_base + i, ret); + dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n", + __func__, reg_base + i, ret); return ret; } ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, gain_sft); @@ -297,8 +297,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, for (i = 0; i < 8; i += 2) { ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val_l); if (ret < 0) { - dev_err(component->dev, "Failed to read 0x%x, ret=%d\n", - reg_base + i, ret); + dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n", + __func__, reg_base + i, ret); return ret; } ucontrol->value.integer.value[i] = (val_l >> gain_sft) / 10; @@ -306,8 +306,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, reg = (i == 6) ? (reg_base - 1) | BIT(15) : reg_base + 1 + i; ret = regmap_read(rt715->mbq_regmap, reg, &val_r); if (ret < 0) { - dev_err(component->dev, "Failed to read 0x%x, ret=%d\n", - reg, ret); + dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n", + __func__, reg, ret); return ret; } ucontrol->value.integer.value[i + 1] = (val_r >> gain_sft) / 10; @@ -834,15 +834,15 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream, 0xaf00); break; default: - dev_err(component->dev, "Invalid DAI id %d\n", dai->id); + dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id); return -EINVAL; } retval = sdw_stream_add_slave(rt715->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(component->dev, "Unable to configure port, retval:%d\n", - retval); + dev_err(component->dev, "%s: Unable to configure port, retval:%d\n", + __func__, retval); return retval; } @@ -893,8 +893,8 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream, val = 0xf; break; default: - dev_err(component->dev, "Unsupported sample rate %d\n", - params_rate(params)); + dev_err(component->dev, "%s: Unsupported sample rate %d\n", + __func__, params_rate(params)); return -EINVAL; } diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index 21f37babd148..7e13868ff99f 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -482,7 +482,7 @@ static int rt715_bus_config(struct sdw_slave *slave, ret = rt715_clock_config(&slave->dev); if (ret < 0) - dev_err(&slave->dev, "Invalid clk config"); + dev_err(&slave->dev, "%s: Invalid clk config", __func__); return 0; } @@ -554,7 +554,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev) time = wait_for_completion_timeout(&slave->initialization_complete, msecs_to_jiffies(RT715_PROBE_TIMEOUT)); if (!time) { - dev_err(&slave->dev, "Initialization not complete, timed out\n"); + dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__); sdw_show_ping_status(slave->bus, true); return -ETIMEDOUT; diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c index 9f732a5abd53..299c9b12377c 100644 --- a/sound/soc/codecs/rt715.c +++ b/sound/soc/codecs/rt715.c @@ -40,8 +40,8 @@ static int rt715_index_write(struct regmap *regmap, unsigned int reg, ret = regmap_write(regmap, addr, value); if (ret < 0) { - pr_err("Failed to set private value: %08x <= %04x %d\n", - addr, value, ret); + pr_err("%s: Failed to set private value: %08x <= %04x %d\n", + __func__, addr, value, ret); } return ret; @@ -55,8 +55,8 @@ static int rt715_index_write_nid(struct regmap *regmap, ret = regmap_write(regmap, addr, value); if (ret < 0) - pr_err("Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -70,8 +70,8 @@ static int rt715_index_read_nid(struct regmap *regmap, *value = 0; ret = regmap_read(regmap, addr, value); if (ret < 0) - pr_err("Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -862,14 +862,14 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream, rt715_index_write(rt715->regmap, RT715_SDW_INPUT_SEL, 0xa000); break; default: - dev_err(component->dev, "Invalid DAI id %d\n", dai->id); + dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id); return -EINVAL; } retval = sdw_stream_add_slave(rt715->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } @@ -883,8 +883,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream, val |= 0x0 << 8; break; default: - dev_err(component->dev, "Unsupported sample rate %d\n", - params_rate(params)); + dev_err(component->dev, "%s: Unsupported sample rate %d\n", + __func__, params_rate(params)); return -EINVAL; } @@ -892,8 +892,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream, /* bit 3:0 Number of Channel */ val |= (params_channels(params) - 1); } else { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index eb76f4c675b6..65d584c1886e 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -467,13 +467,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt722->disable_irq_lock); if (rt722->disable_irq == true) { - mutex_lock(&rt722->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt722->disable_irq = false; - mutex_unlock(&rt722->disable_irq_lock); } + mutex_unlock(&rt722->disable_irq_lock); goto regmap_sync; } diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c index 0e1c65a20392..e0ea3a23f7cc 100644 --- a/sound/soc/codecs/rt722-sdca.c +++ b/sound/soc/codecs/rt722-sdca.c @@ -35,8 +35,8 @@ int rt722_sdca_index_write(struct rt722_sdca_priv *rt722, ret = regmap_write(regmap, addr, value); if (ret < 0) dev_err(&rt722->slave->dev, - "Failed to set private value: %06x <= %04x ret=%d\n", - addr, value, ret); + "%s: Failed to set private value: %06x <= %04x ret=%d\n", + __func__, addr, value, ret); return ret; } @@ -51,8 +51,8 @@ int rt722_sdca_index_read(struct rt722_sdca_priv *rt722, ret = regmap_read(regmap, addr, value); if (ret < 0) dev_err(&rt722->slave->dev, - "Failed to get private value: %06x => %04x ret=%d\n", - addr, *value, ret); + "%s: Failed to get private value: %06x => %04x ret=%d\n", + __func__, addr, *value, ret); return ret; } @@ -663,7 +663,8 @@ static int rt722_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol, for (i = 0; i < p->count; i++) { err = regmap_write(rt722->mbq_regmap, p->reg_base + i, gain_val[i]); if (err < 0) - dev_err(&rt722->slave->dev, "%#08x can't be set\n", p->reg_base + i); + dev_err(&rt722->slave->dev, "%s: %#08x can't be set\n", + __func__, p->reg_base + i); } return changed; @@ -1211,13 +1212,13 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream, retval = sdw_stream_add_slave(rt722->slave, &stream_config, &port_config, 1, sdw_stream); if (retval) { - dev_err(dai->dev, "Unable to configure port\n"); + dev_err(dai->dev, "%s: Unable to configure port\n", __func__); return retval; } if (params_channels(params) > 16) { - dev_err(component->dev, "Unsupported channels %d\n", - params_channels(params)); + dev_err(component->dev, "%s: Unsupported channels %d\n", + __func__, params_channels(params)); return -EINVAL; } @@ -1236,8 +1237,8 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream, sampling_rate = RT722_SDCA_RATE_192000HZ; break; default: - dev_err(component->dev, "Rate %d is not supported\n", - params_rate(params)); + dev_err(component->dev, "%s: Rate %d is not supported\n", + __func__, params_rate(params)); return -EINVAL; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index e451c009f2d9..7d5c096e06cd 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len) { - struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); + struct cs_dsp_coeff_ctl *cs_ctl; struct wm_coeff_ctl *ctl; int ret; mutex_lock(&dsp->cs_dsp.pwr_lock); + cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len); mutex_unlock(&dsp->cs_dsp.pwr_lock); diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c index c018f84fe025..fc072dc58968 100644 --- a/sound/soc/intel/avs/boards/da7219.c +++ b/sound/soc/intel/avs/boards/da7219.c @@ -296,5 +296,6 @@ static struct platform_driver avs_da7219_driver = { module_platform_driver(avs_da7219_driver); +MODULE_DESCRIPTION("Intel da7219 machine driver"); MODULE_AUTHOR("Cezary Rojewski "); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/dmic.c b/sound/soc/intel/avs/boards/dmic.c index ba2bc7f689eb..d9e5e85f5233 100644 --- a/sound/soc/intel/avs/boards/dmic.c +++ b/sound/soc/intel/avs/boards/dmic.c @@ -96,4 +96,5 @@ static struct platform_driver avs_dmic_driver = { module_platform_driver(avs_dmic_driver); +MODULE_DESCRIPTION("Intel DMIC machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/es8336.c b/sound/soc/intel/avs/boards/es8336.c index 1090082e7d5b..5c90a6007577 100644 --- a/sound/soc/intel/avs/boards/es8336.c +++ b/sound/soc/intel/avs/boards/es8336.c @@ -326,4 +326,5 @@ static struct platform_driver avs_es8336_driver = { module_platform_driver(avs_es8336_driver); +MODULE_DESCRIPTION("Intel es8336 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/i2s_test.c b/sound/soc/intel/avs/boards/i2s_test.c index 28f254eb0d03..027373d6a16d 100644 --- a/sound/soc/intel/avs/boards/i2s_test.c +++ b/sound/soc/intel/avs/boards/i2s_test.c @@ -204,4 +204,5 @@ static struct platform_driver avs_i2s_test_driver = { module_platform_driver(avs_i2s_test_driver); +MODULE_DESCRIPTION("Intel i2s test machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c index a83b95f25129..1ff85e4d8e16 100644 --- a/sound/soc/intel/avs/boards/max98357a.c +++ b/sound/soc/intel/avs/boards/max98357a.c @@ -154,4 +154,5 @@ static struct platform_driver avs_max98357a_driver = { module_platform_driver(avs_max98357a_driver) +MODULE_DESCRIPTION("Intel max98357a machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/max98373.c b/sound/soc/intel/avs/boards/max98373.c index 3b980a025e6f..8d31586b73ea 100644 --- a/sound/soc/intel/avs/boards/max98373.c +++ b/sound/soc/intel/avs/boards/max98373.c @@ -211,4 +211,5 @@ static struct platform_driver avs_max98373_driver = { module_platform_driver(avs_max98373_driver) +MODULE_DESCRIPTION("Intel max98373 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/max98927.c b/sound/soc/intel/avs/boards/max98927.c index 86dd2b228df3..572ec58073d0 100644 --- a/sound/soc/intel/avs/boards/max98927.c +++ b/sound/soc/intel/avs/boards/max98927.c @@ -208,4 +208,5 @@ static struct platform_driver avs_max98927_driver = { module_platform_driver(avs_max98927_driver) +MODULE_DESCRIPTION("Intel max98927 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c index 1c1e2083f474..55db75efae41 100644 --- a/sound/soc/intel/avs/boards/nau8825.c +++ b/sound/soc/intel/avs/boards/nau8825.c @@ -313,4 +313,5 @@ static struct platform_driver avs_nau8825_driver = { module_platform_driver(avs_nau8825_driver) +MODULE_DESCRIPTION("Intel nau8825 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/probe.c b/sound/soc/intel/avs/boards/probe.c index a9469b5ecb40..8be6887bbc6e 100644 --- a/sound/soc/intel/avs/boards/probe.c +++ b/sound/soc/intel/avs/boards/probe.c @@ -69,4 +69,5 @@ static struct platform_driver avs_probe_mb_driver = { module_platform_driver(avs_probe_mb_driver); +MODULE_DESCRIPTION("Intel probe machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt274.c b/sound/soc/intel/avs/boards/rt274.c index bfcb8845fd15..1cf524216087 100644 --- a/sound/soc/intel/avs/boards/rt274.c +++ b/sound/soc/intel/avs/boards/rt274.c @@ -276,4 +276,5 @@ static struct platform_driver avs_rt274_driver = { module_platform_driver(avs_rt274_driver); +MODULE_DESCRIPTION("Intel rt274 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt286.c b/sound/soc/intel/avs/boards/rt286.c index 28d7d86b1cc9..4740bba10570 100644 --- a/sound/soc/intel/avs/boards/rt286.c +++ b/sound/soc/intel/avs/boards/rt286.c @@ -247,4 +247,5 @@ static struct platform_driver avs_rt286_driver = { module_platform_driver(avs_rt286_driver); +MODULE_DESCRIPTION("Intel rt286 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt298.c b/sound/soc/intel/avs/boards/rt298.c index 80f490b9e118..6e409e29f697 100644 --- a/sound/soc/intel/avs/boards/rt298.c +++ b/sound/soc/intel/avs/boards/rt298.c @@ -266,4 +266,5 @@ static struct platform_driver avs_rt298_driver = { module_platform_driver(avs_rt298_driver); +MODULE_DESCRIPTION("Intel rt298 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt5514.c b/sound/soc/intel/avs/boards/rt5514.c index 60105f453ae2..097ae5f73241 100644 --- a/sound/soc/intel/avs/boards/rt5514.c +++ b/sound/soc/intel/avs/boards/rt5514.c @@ -192,4 +192,5 @@ static struct platform_driver avs_rt5514_driver = { module_platform_driver(avs_rt5514_driver); +MODULE_DESCRIPTION("Intel rt5514 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt5663.c b/sound/soc/intel/avs/boards/rt5663.c index b4762c2a7bf2..1880c315cc4d 100644 --- a/sound/soc/intel/avs/boards/rt5663.c +++ b/sound/soc/intel/avs/boards/rt5663.c @@ -265,4 +265,5 @@ static struct platform_driver avs_rt5663_driver = { module_platform_driver(avs_rt5663_driver); +MODULE_DESCRIPTION("Intel rt5663 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c index 243f979fda98..594a971ded9e 100644 --- a/sound/soc/intel/avs/boards/rt5682.c +++ b/sound/soc/intel/avs/boards/rt5682.c @@ -341,5 +341,6 @@ static struct platform_driver avs_rt5682_driver = { module_platform_driver(avs_rt5682_driver) +MODULE_DESCRIPTION("Intel rt5682 machine driver"); MODULE_AUTHOR("Cezary Rojewski "); MODULE_LICENSE("GPL"); diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c index 4a0e136835ff..d6f7f046c24e 100644 --- a/sound/soc/intel/avs/boards/ssm4567.c +++ b/sound/soc/intel/avs/boards/ssm4567.c @@ -200,4 +200,5 @@ static struct platform_driver avs_ssm4567_driver = { module_platform_driver(avs_ssm4567_driver) +MODULE_DESCRIPTION("Intel ssm4567 machine driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 2d25748ca706..b27e89ff6a16 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, int max = mc->max; int min = mc->min; int sign_bit = mc->sign_bit; - unsigned int mask = (1 << fls(max)) - 1; + unsigned int mask = (1ULL << fls(max)) - 1; unsigned int invert = mc->invert; int val; int ret; diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index be7dc1e02284..c12c7f820529 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -704,6 +704,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev) goto unregister_dev; } + ret = acp_init(sdev); + if (ret < 0) + goto free_smn_dev; + sdev->ipc_irq = pci->irq; ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread, IRQF_SHARED, "AudioDSP", sdev); @@ -713,10 +717,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev) goto free_smn_dev; } - ret = acp_init(sdev); - if (ret < 0) - goto free_ipc_irq; - /* scan SoundWire capabilities exposed by DSDT */ ret = acp_sof_scan_sdw_devices(sdev, chip->sdw_acpi_dev_addr); if (ret < 0) { diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 9b00ede2a486..cc84d4c81be9 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -339,8 +339,7 @@ static int sof_init_environment(struct snd_sof_dev *sdev) ret = snd_sof_probe(sdev); if (ret < 0) { dev_err(sdev->dev, "failed to probe DSP %d\n", ret); - sof_ops_free(sdev); - return ret; + goto err_sof_probe; } /* check machine info */ @@ -358,15 +357,18 @@ static int sof_init_environment(struct snd_sof_dev *sdev) ret = validate_sof_ops(sdev); if (ret < 0) { snd_sof_remove(sdev); + snd_sof_remove_late(sdev); return ret; } } + return 0; + err_machine_check: - if (ret) { - snd_sof_remove(sdev); - sof_ops_free(sdev); - } + snd_sof_remove(sdev); +err_sof_probe: + snd_sof_remove_late(sdev); + sof_ops_free(sdev); return ret; } diff --git a/sound/soc/sof/intel/hda-common-ops.c b/sound/soc/sof/intel/hda-common-ops.c index 2b385cddc385..d71bb66b9991 100644 --- a/sound/soc/sof/intel/hda-common-ops.c +++ b/sound/soc/sof/intel/hda-common-ops.c @@ -57,6 +57,9 @@ struct snd_sof_dsp_ops sof_hda_common_ops = { .pcm_pointer = hda_dsp_pcm_pointer, .pcm_ack = hda_dsp_pcm_ack, + .get_dai_frame_counter = hda_dsp_get_stream_llp, + .get_host_byte_counter = hda_dsp_get_stream_ldp, + /* firmware loading */ .load_firmware = snd_sof_load_firmware_raw, diff --git a/sound/soc/sof/intel/hda-dai-ops.c b/sound/soc/sof/intel/hda-dai-ops.c index c50ca9e72d37..b073720b4cf4 100644 --- a/sound/soc/sof/intel/hda-dai-ops.c +++ b/sound/soc/sof/intel/hda-dai-ops.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -362,6 +363,16 @@ static int hda_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai, case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: snd_hdac_ext_stream_clear(hext_stream); + + /* + * Save the LLP registers in case the stream is + * restarting due PAUSE_RELEASE, or START without a pcm + * close/open since in this case the LLP register is not reset + * to 0 and the delay calculation will return with invalid + * results. + */ + hext_stream->pplcllpl = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL); + hext_stream->pplcllpu = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU); break; default: dev_err(sdev->dev, "unknown trigger command %d\n", cmd); diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 31ffa1a8f2ac..ef5c915db8ff 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -681,17 +681,27 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend) struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; const struct sof_intel_dsp_desc *chip = hda->desc; struct hdac_bus *bus = sof_to_bus(sdev); + bool imr_lost = false; int ret, j; /* - * The memory used for IMR boot loses its content in deeper than S3 state - * We must not try IMR boot on next power up (as it will fail). - * + * The memory used for IMR boot loses its content in deeper than S3 + * state on CAVS platforms. + * On ACE platforms due to the system architecture the IMR content is + * lost at S3 state already, they are tailored for s2idle use. + * We must not try IMR boot on next power up in these cases as it will + * fail. + */ + if (sdev->system_suspend_target > SOF_SUSPEND_S3 || + (chip->hw_ip_version >= SOF_INTEL_ACE_1_0 && + sdev->system_suspend_target == SOF_SUSPEND_S3)) + imr_lost = true; + + /* * In case of firmware crash or boot failure set the skip_imr_boot to true * as well in order to try to re-load the firmware to do a 'cold' boot. */ - if (sdev->system_suspend_target > SOF_SUSPEND_S3 || - sdev->fw_state == SOF_FW_CRASHED || + if (imr_lost || sdev->fw_state == SOF_FW_CRASHED || sdev->fw_state == SOF_FW_BOOT_FAILED) hda->skip_imr_boot = true; diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index 18f07364d219..d7b446f3f973 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -259,8 +259,37 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S32); + /* + * The dsp_max_burst_size_in_ms is the length of the maximum burst size + * of the host DMA in the ALSA buffer. + * + * On playback start the DMA will transfer dsp_max_burst_size_in_ms + * amount of data in one initial burst to fill up the host DMA buffer. + * Consequent DMA burst sizes are shorter and their length can vary. + * To make sure that userspace allocate large enough ALSA buffer we need + * to place a constraint on the buffer time. + * + * On capture the DMA will transfer 1ms chunks. + * + * Exact dsp_max_burst_size_in_ms constraint is racy, so set the + * constraint to a minimum of 2x dsp_max_burst_size_in_ms. + */ + if (spcm->stream[direction].dsp_max_burst_size_in_ms) + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_BUFFER_TIME, + spcm->stream[direction].dsp_max_burst_size_in_ms * USEC_PER_MSEC * 2, + UINT_MAX); + /* binding pcm substream to hda stream */ substream->runtime->private_data = &dsp_stream->hstream; + + /* + * Reset the llp cache values (they are used for LLP compensation in + * case the counter is not reset) + */ + dsp_stream->pplcllpl = 0; + dsp_stream->pplcllpu = 0; + return 0; } diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index b387b1a69d7e..0c189d3b19c1 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -1063,3 +1063,73 @@ snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream, return pos; } + +#define merge_u64(u32_u, u32_l) (((u64)(u32_u) << 32) | (u32_l)) + +/** + * hda_dsp_get_stream_llp - Retrieve the LLP (Linear Link Position) of the stream + * @sdev: SOF device + * @component: ASoC component + * @substream: PCM substream + * + * Returns the raw Linear Link Position value + */ +u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + struct hdac_stream *hstream = substream->runtime->private_data; + struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream); + u32 llp_l, llp_u; + + /* + * The pplc_addr have been calculated during probe in + * hda_dsp_stream_init(): + * pplc_addr = sdev->bar[HDA_DSP_PP_BAR] + + * SOF_HDA_PPLC_BASE + + * SOF_HDA_PPLC_MULTI * total_stream + + * SOF_HDA_PPLC_INTERVAL * stream_index + * + * Use this pre-calculated address to avoid repeated re-calculation. + */ + llp_l = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL); + llp_u = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU); + + /* Compensate the LLP counter with the saved offset */ + if (hext_stream->pplcllpl || hext_stream->pplcllpu) + return merge_u64(llp_u, llp_l) - + merge_u64(hext_stream->pplcllpu, hext_stream->pplcllpl); + + return merge_u64(llp_u, llp_l); +} + +/** + * hda_dsp_get_stream_ldp - Retrieve the LDP (Linear DMA Position) of the stream + * @sdev: SOF device + * @component: ASoC component + * @substream: PCM substream + * + * Returns the raw Linear Link Position value + */ +u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + struct hdac_stream *hstream = substream->runtime->private_data; + struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream); + u32 ldp_l, ldp_u; + + /* + * The pphc_addr have been calculated during probe in + * hda_dsp_stream_init(): + * pphc_addr = sdev->bar[HDA_DSP_PP_BAR] + + * SOF_HDA_PPHC_BASE + + * SOF_HDA_PPHC_INTERVAL * stream_index + * + * Use this pre-calculated address to avoid repeated re-calculation. + */ + ldp_l = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPL); + ldp_u = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPU); + + return ((u64)ldp_u << 32) | ldp_l; +} diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index b36eb7c78913..81a1d4606d3c 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -662,6 +662,12 @@ bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev); snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream, int direction, bool can_sleep); +u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream); +u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream); struct hdac_ext_stream * hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags); diff --git a/sound/soc/sof/intel/lnl.c b/sound/soc/sof/intel/lnl.c index 7ae017a00184..aeb4350cce6b 100644 --- a/sound/soc/sof/intel/lnl.c +++ b/sound/soc/sof/intel/lnl.c @@ -29,15 +29,17 @@ static const struct snd_sof_debugfs_map lnl_dsp_debugfs[] = { }; /* this helps allows the DSP to setup DMIC/SSP */ -static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus) +static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus, bool enable) { int ret; - ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_SSP, true); + ret = hdac_bus_eml_enable_offload(bus, true, + AZX_REG_ML_LEPTR_ID_INTEL_SSP, enable); if (ret < 0) return ret; - ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_DMIC, true); + ret = hdac_bus_eml_enable_offload(bus, true, + AZX_REG_ML_LEPTR_ID_INTEL_DMIC, enable); if (ret < 0) return ret; @@ -52,7 +54,19 @@ static int lnl_hda_dsp_probe(struct snd_sof_dev *sdev) if (ret < 0) return ret; - return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev)); + return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true); +} + +static void lnl_hda_dsp_remove(struct snd_sof_dev *sdev) +{ + int ret; + + ret = hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), false); + if (ret < 0) + dev_warn(sdev->dev, + "Failed to disable offload for DMIC/SSP: %d\n", ret); + + hda_dsp_remove(sdev); } static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev) @@ -63,7 +77,7 @@ static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev) if (ret < 0) return ret; - return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev)); + return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true); } static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev) @@ -74,7 +88,7 @@ static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev) if (ret < 0) return ret; - return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev)); + return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true); } static int lnl_dsp_post_fw_run(struct snd_sof_dev *sdev) @@ -97,9 +111,11 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev) /* common defaults */ memcpy(&sof_lnl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops)); - /* probe */ - if (!sdev->dspless_mode_selected) + /* probe/remove */ + if (!sdev->dspless_mode_selected) { sof_lnl_ops.probe = lnl_hda_dsp_probe; + sof_lnl_ops.remove = lnl_hda_dsp_remove; + } /* shutdown */ sof_lnl_ops.shutdown = hda_dsp_shutdown; @@ -134,8 +150,6 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev) sof_lnl_ops.runtime_resume = lnl_hda_dsp_runtime_resume; } - sof_lnl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position; - /* dsp core get/put */ sof_lnl_ops.core_get = mtl_dsp_core_get; sof_lnl_ops.core_put = mtl_dsp_core_put; diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c index df05dc77b8d5..060c34988e90 100644 --- a/sound/soc/sof/intel/mtl.c +++ b/sound/soc/sof/intel/mtl.c @@ -626,18 +626,6 @@ static int mtl_dsp_disable_interrupts(struct snd_sof_dev *sdev) return mtl_enable_interrupts(sdev, false); } -u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev, - struct snd_soc_component *component, - struct snd_pcm_substream *substream) -{ - struct hdac_stream *hstream = substream->runtime->private_data; - u32 llp_l, llp_u; - - llp_l = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPL(hstream->index)); - llp_u = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPU(hstream->index)); - return ((u64)llp_u << 32) | llp_l; -} - int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core) { const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm; @@ -707,8 +695,6 @@ int sof_mtl_ops_init(struct snd_sof_dev *sdev) sof_mtl_ops.core_get = mtl_dsp_core_get; sof_mtl_ops.core_put = mtl_dsp_core_put; - sof_mtl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position; - sdev->private = kzalloc(sizeof(struct sof_ipc4_fw_data), GFP_KERNEL); if (!sdev->private) return -ENOMEM; diff --git a/sound/soc/sof/intel/mtl.h b/sound/soc/sof/intel/mtl.h index cc5a1f46fd09..ea8c1b83f712 100644 --- a/sound/soc/sof/intel/mtl.h +++ b/sound/soc/sof/intel/mtl.h @@ -6,12 +6,6 @@ * Copyright(c) 2020-2022 Intel Corporation. All rights reserved. */ -/* HDA Registers */ -#define MTL_PPLCLLPL_BASE 0x948 -#define MTL_PPLCLLPU_STRIDE 0x10 -#define MTL_PPLCLLPL(x) (MTL_PPLCLLPL_BASE + (x) * MTL_PPLCLLPU_STRIDE) -#define MTL_PPLCLLPU(x) (MTL_PPLCLLPL_BASE + 0x4 + (x) * MTL_PPLCLLPU_STRIDE) - /* DSP Registers */ #define MTL_HFDSSCS 0x1000 #define MTL_HFDSSCS_SPA_MASK BIT(16) @@ -103,9 +97,5 @@ int mtl_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id); void mtl_ipc_dump(struct snd_sof_dev *sdev); -u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev, - struct snd_soc_component *component, - struct snd_pcm_substream *substream); - int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core); int mtl_dsp_core_put(struct snd_sof_dev *sdev, int core); diff --git a/sound/soc/sof/ipc4-mtrace.c b/sound/soc/sof/ipc4-mtrace.c index 9f1e33ee8826..0e04bea9432d 100644 --- a/sound/soc/sof/ipc4-mtrace.c +++ b/sound/soc/sof/ipc4-mtrace.c @@ -4,6 +4,7 @@ #include #include +#include #include #include "sof-priv.h" #include "ipc4-priv.h" @@ -412,7 +413,6 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev) const struct sof_ipc_ops *iops = sdev->ipc->ops; struct sof_ipc4_msg msg; u64 system_time; - ktime_t kt; int ret; if (priv->mtrace_state != SOF_MTRACE_DISABLED) @@ -424,9 +424,12 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev) msg.primary |= SOF_IPC4_MOD_INSTANCE(SOF_IPC4_MOD_INIT_BASEFW_INSTANCE_ID); msg.extension = SOF_IPC4_MOD_EXT_MSG_PARAM_ID(SOF_IPC4_FW_PARAM_SYSTEM_TIME); - /* The system time is in usec, UTC, epoch is 1601-01-01 00:00:00 */ - kt = ktime_add_us(ktime_get_real(), FW_EPOCH_DELTA * USEC_PER_SEC); - system_time = ktime_to_us(kt); + /* + * local_clock() is used to align with dmesg, so both kernel and firmware logs have + * the same base and a minor delta due to the IPC. system time is in us format but + * local_clock() returns the time in ns, so convert to ns. + */ + system_time = div64_u64(local_clock(), NSEC_PER_USEC); msg.data_size = sizeof(system_time); msg.data_ptr = &system_time; ret = iops->set_get_data(sdev, &msg, msg.data_size, true); diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 0f332c8cdbe6..e915f9f87a6c 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -15,6 +15,28 @@ #include "ipc4-topology.h" #include "ipc4-fw-reg.h" +/** + * struct sof_ipc4_timestamp_info - IPC4 timestamp info + * @host_copier: the host copier of the pcm stream + * @dai_copier: the dai copier of the pcm stream + * @stream_start_offset: reported by fw in memory window (converted to frames) + * @stream_end_offset: reported by fw in memory window (converted to frames) + * @llp_offset: llp offset in memory window + * @boundary: wrap boundary should be used for the LLP frame counter + * @delay: Calculated and stored in pointer callback. The stored value is + * returned in the delay callback. + */ +struct sof_ipc4_timestamp_info { + struct sof_ipc4_copier *host_copier; + struct sof_ipc4_copier *dai_copier; + u64 stream_start_offset; + u64 stream_end_offset; + u32 llp_offset; + + u64 boundary; + snd_pcm_sframes_t delay; +}; + static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state, struct ipc4_pipeline_set_state_data *trigger_list) { @@ -423,8 +445,19 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, } /* return if this is the final state */ - if (state == SOF_IPC4_PIPE_PAUSED) + if (state == SOF_IPC4_PIPE_PAUSED) { + struct sof_ipc4_timestamp_info *time_info; + + /* + * Invalidate the stream_start_offset to make sure that it is + * going to be updated if the stream resumes + */ + time_info = spcm->stream[substream->stream].private; + if (time_info) + time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION; + goto free; + } skip_pause_transition: /* else set the RUNNING/RESET state in the DSP */ ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list); @@ -464,14 +497,12 @@ static int sof_ipc4_pcm_trigger(struct snd_soc_component *component, /* determine the pipeline state */ switch (cmd) { - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - state = SOF_IPC4_PIPE_PAUSED; - break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_START: state = SOF_IPC4_PIPE_RUNNING; break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: state = SOF_IPC4_PIPE_PAUSED; @@ -703,6 +734,10 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm if (abi_version < SOF_IPC4_FW_REGS_ABI_VER) support_info = false; + /* For delay reporting the get_host_byte_counter callback is needed */ + if (!sof_ops(sdev) || !sof_ops(sdev)->get_host_byte_counter) + support_info = false; + for_each_pcm_streams(stream) { pipeline_list = &spcm->stream[stream].pipeline_list; @@ -835,7 +870,6 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev, struct sof_ipc4_copier *host_copier = time_info->host_copier; struct sof_ipc4_copier *dai_copier = time_info->dai_copier; struct sof_ipc4_pipeline_registers ppl_reg; - u64 stream_start_position; u32 dai_sample_size; u32 ch, node_index; u32 offset; @@ -852,38 +886,51 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev, if (ppl_reg.stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) return -EINVAL; - stream_start_position = ppl_reg.stream_start_offset; ch = dai_copier->data.out_format.fmt_cfg; ch = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(ch); dai_sample_size = (dai_copier->data.out_format.bit_depth >> 3) * ch; - /* convert offset to sample count */ - do_div(stream_start_position, dai_sample_size); - time_info->stream_start_offset = stream_start_position; + + /* convert offsets to frame count */ + time_info->stream_start_offset = ppl_reg.stream_start_offset; + do_div(time_info->stream_start_offset, dai_sample_size); + time_info->stream_end_offset = ppl_reg.stream_end_offset; + do_div(time_info->stream_end_offset, dai_sample_size); + + /* + * Calculate the wrap boundary need to be used for delay calculation + * The host counter is in bytes, it will wrap earlier than the frames + * based link counter. + */ + time_info->boundary = div64_u64(~((u64)0), + frames_to_bytes(substream->runtime, 1)); + /* Initialize the delay value to 0 (no delay) */ + time_info->delay = 0; return 0; } -static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, - struct snd_pcm_substream *substream) +static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, + struct snd_pcm_substream *substream, + snd_pcm_uframes_t *pointer) { struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct sof_ipc4_timestamp_info *time_info; struct sof_ipc4_llp_reading_slot llp; - snd_pcm_uframes_t head_ptr, tail_ptr; + snd_pcm_uframes_t head_cnt, tail_cnt; struct snd_sof_pcm_stream *stream; + u64 dai_cnt, host_cnt, host_ptr; struct snd_sof_pcm *spcm; - u64 tmp_ptr; int ret; spcm = snd_sof_find_spcm_dai(component, rtd); if (!spcm) - return 0; + return -EOPNOTSUPP; stream = &spcm->stream[substream->stream]; time_info = stream->private; if (!time_info) - return 0; + return -EOPNOTSUPP; /* * stream_start_offset is updated to memory window by FW based on @@ -893,45 +940,116 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) { ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info); if (ret < 0) - return 0; + return -EOPNOTSUPP; } + /* For delay calculation we need the host counter */ + host_cnt = snd_sof_pcm_get_host_byte_counter(sdev, component, substream); + host_ptr = host_cnt; + + /* convert the host_cnt to frames */ + host_cnt = div64_u64(host_cnt, frames_to_bytes(substream->runtime, 1)); + /* - * HDaudio links don't support the LLP counter reported by firmware - * the link position is read directly from hardware registers. + * If the LLP counter is not reported by firmware in the SRAM window + * then read the dai (link) counter via host accessible means if + * available. */ if (!time_info->llp_offset) { - tmp_ptr = snd_sof_pcm_get_stream_position(sdev, component, substream); - if (!tmp_ptr) - return 0; + dai_cnt = snd_sof_pcm_get_dai_frame_counter(sdev, component, substream); + if (!dai_cnt) + return -EOPNOTSUPP; } else { sof_mailbox_read(sdev, time_info->llp_offset, &llp, sizeof(llp)); - tmp_ptr = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l; + dai_cnt = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l; } + dai_cnt += time_info->stream_end_offset; - /* In two cases dai dma position is not accurate + /* In two cases dai dma counter is not accurate * (1) dai pipeline is started before host pipeline - * (2) multiple streams mixed into one. Each stream has the same dai dma position + * (2) multiple streams mixed into one. Each stream has the same dai dma + * counter * - * Firmware calculates correct stream_start_offset for all cases including above two. - * Driver subtracts stream_start_offset from dai dma position to get accurate one + * Firmware calculates correct stream_start_offset for all cases + * including above two. + * Driver subtracts stream_start_offset from dai dma counter to get + * accurate one */ - tmp_ptr -= time_info->stream_start_offset; - /* Calculate the delay taking into account that both pointer can wrap */ - div64_u64_rem(tmp_ptr, substream->runtime->boundary, &tmp_ptr); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - head_ptr = substream->runtime->status->hw_ptr; - tail_ptr = tmp_ptr; + /* + * On stream start the dai counter might not yet have reached the + * stream_start_offset value which means that no frames have left the + * DSP yet from the audio stream (on playback, capture streams have + * offset of 0 as we start capturing right away). + * In this case we need to adjust the distance between the counters by + * increasing the host counter by (offset - dai_counter). + * Otherwise the dai_counter needs to be adjusted to reflect the number + * of valid frames passed on the DAI side. + * + * The delay is the difference between the counters on the two + * sides of the DSP. + */ + if (dai_cnt < time_info->stream_start_offset) { + host_cnt += time_info->stream_start_offset - dai_cnt; + dai_cnt = 0; } else { - head_ptr = tmp_ptr; - tail_ptr = substream->runtime->status->hw_ptr; + dai_cnt -= time_info->stream_start_offset; } - if (head_ptr < tail_ptr) - return substream->runtime->boundary - tail_ptr + head_ptr; + /* Wrap the dai counter at the boundary where the host counter wraps */ + div64_u64_rem(dai_cnt, time_info->boundary, &dai_cnt); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + head_cnt = host_cnt; + tail_cnt = dai_cnt; + } else { + head_cnt = dai_cnt; + tail_cnt = host_cnt; + } + + if (head_cnt < tail_cnt) { + time_info->delay = time_info->boundary - tail_cnt + head_cnt; + goto out; + } + + time_info->delay = head_cnt - tail_cnt; + +out: + /* + * Convert the host byte counter to PCM pointer which wraps in buffer + * and it is in frames + */ + div64_u64_rem(host_ptr, snd_pcm_lib_buffer_bytes(substream), &host_ptr); + *pointer = bytes_to_frames(substream->runtime, host_ptr); + + return 0; +} + +static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct sof_ipc4_timestamp_info *time_info; + struct snd_sof_pcm_stream *stream; + struct snd_sof_pcm *spcm; + + spcm = snd_sof_find_spcm_dai(component, rtd); + if (!spcm) + return 0; + + stream = &spcm->stream[substream->stream]; + time_info = stream->private; + /* + * Report the stored delay value calculated in the pointer callback. + * In the unlikely event that the calculation was skipped/aborted, the + * default 0 delay returned. + */ + if (time_info) + return time_info->delay; + + /* No delay information available, report 0 as delay */ + return 0; - return head_ptr - tail_ptr; } const struct sof_ipc_pcm_ops ipc4_pcm_ops = { @@ -941,6 +1059,7 @@ const struct sof_ipc_pcm_ops ipc4_pcm_ops = { .dai_link_fixup = sof_ipc4_pcm_dai_link_fixup, .pcm_setup = sof_ipc4_pcm_setup, .pcm_free = sof_ipc4_pcm_free, + .pointer = sof_ipc4_pcm_pointer, .delay = sof_ipc4_pcm_delay, .ipc_first_on_start = true, .platform_stop_during_hw_free = true, diff --git a/sound/soc/sof/ipc4-priv.h b/sound/soc/sof/ipc4-priv.h index f3b908b093f9..afed618a15f0 100644 --- a/sound/soc/sof/ipc4-priv.h +++ b/sound/soc/sof/ipc4-priv.h @@ -92,20 +92,6 @@ struct sof_ipc4_fw_data { struct mutex pipeline_state_mutex; /* protect pipeline triggers, ref counts and states */ }; -/** - * struct sof_ipc4_timestamp_info - IPC4 timestamp info - * @host_copier: the host copier of the pcm stream - * @dai_copier: the dai copier of the pcm stream - * @stream_start_offset: reported by fw in memory window - * @llp_offset: llp offset in memory window - */ -struct sof_ipc4_timestamp_info { - struct sof_ipc4_copier *host_copier; - struct sof_ipc4_copier *dai_copier; - u64 stream_start_offset; - u32 llp_offset; -}; - extern const struct sof_ipc_fw_loader_ops ipc4_loader_ops; extern const struct sof_ipc_tplg_ops ipc4_tplg_ops; extern const struct sof_ipc_tplg_control_ops tplg_ipc4_control_ops; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index da4a83afb87a..5cca05842126 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -412,8 +412,9 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget) struct sof_ipc4_available_audio_format *available_fmt; struct snd_soc_component *scomp = swidget->scomp; struct sof_ipc4_copier *ipc4_copier; + struct snd_sof_pcm *spcm; int node_type = 0; - int ret; + int ret, dir; ipc4_copier = kzalloc(sizeof(*ipc4_copier), GFP_KERNEL); if (!ipc4_copier) @@ -447,6 +448,25 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget) } dev_dbg(scomp->dev, "host copier '%s' node_type %u\n", swidget->widget->name, node_type); + spcm = snd_sof_find_spcm_comp(scomp, swidget->comp_id, &dir); + if (!spcm) + goto skip_gtw_cfg; + + if (dir == SNDRV_PCM_STREAM_PLAYBACK) { + struct snd_sof_pcm_stream *sps = &spcm->stream[dir]; + + sof_update_ipc_object(scomp, &sps->dsp_max_burst_size_in_ms, + SOF_COPIER_DEEP_BUFFER_TOKENS, + swidget->tuples, + swidget->num_tuples, sizeof(u32), 1); + /* Set default DMA buffer size if it is not specified in topology */ + if (!sps->dsp_max_burst_size_in_ms) + sps->dsp_max_burst_size_in_ms = SOF_IPC4_MIN_DMA_BUFFER_SIZE; + } else { + /* Capture data is copied from DSP to host in 1ms bursts */ + spcm->stream[dir].dsp_max_burst_size_in_ms = 1; + } + skip_gtw_cfg: ipc4_copier->gtw_attr = kzalloc(sizeof(*ipc4_copier->gtw_attr), GFP_KERNEL); if (!ipc4_copier->gtw_attr) { @@ -1356,6 +1376,7 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s int sample_rate, channel_count; int bit_depth, ret; u32 nhlt_type; + int dev_type = 0; /* convert to NHLT type */ switch (linktype) { @@ -1371,18 +1392,30 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s &bit_depth); if (ret < 0) return ret; + + /* + * We need to know the type of the external device attached to a SSP + * port to retrieve the blob from NHLT. However, device type is not + * specified in topology. + * Query the type for the port and then pass that information back + * to the blob lookup function. + */ + dev_type = intel_nhlt_ssp_device_type(sdev->dev, ipc4_data->nhlt, + dai_index); + if (dev_type < 0) + return dev_type; break; default: return 0; } - dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d\n", - dai_index, nhlt_type, dir); + dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d dev type %d\n", + dai_index, nhlt_type, dir, dev_type); /* find NHLT blob with matching params */ cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt, dai_index, nhlt_type, bit_depth, bit_depth, channel_count, sample_rate, - dir, 0); + dir, dev_type); if (!cfg) { dev_err(sdev->dev, diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h index 6cf21e829e07..3cd748e13460 100644 --- a/sound/soc/sof/ops.h +++ b/sound/soc/sof/ops.h @@ -523,12 +523,26 @@ static inline int snd_sof_pcm_platform_ack(struct snd_sof_dev *sdev, return 0; } -static inline u64 snd_sof_pcm_get_stream_position(struct snd_sof_dev *sdev, - struct snd_soc_component *component, - struct snd_pcm_substream *substream) +static inline u64 +snd_sof_pcm_get_dai_frame_counter(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream) { - if (sof_ops(sdev) && sof_ops(sdev)->get_stream_position) - return sof_ops(sdev)->get_stream_position(sdev, component, substream); + if (sof_ops(sdev) && sof_ops(sdev)->get_dai_frame_counter) + return sof_ops(sdev)->get_dai_frame_counter(sdev, component, + substream); + + return 0; +} + +static inline u64 +snd_sof_pcm_get_host_byte_counter(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + if (sof_ops(sdev) && sof_ops(sdev)->get_host_byte_counter) + return sof_ops(sdev)->get_host_byte_counter(sdev, component, + substream); return 0; } diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index 33d576b17647..f03cee94bce6 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -388,13 +388,21 @@ static snd_pcm_uframes_t sof_pcm_pointer(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); + const struct sof_ipc_pcm_ops *pcm_ops = sof_ipc_get_ops(sdev, pcm); struct snd_sof_pcm *spcm; snd_pcm_uframes_t host, dai; + int ret = -EOPNOTSUPP; /* nothing to do for BE */ if (rtd->dai_link->no_pcm) return 0; + if (pcm_ops && pcm_ops->pointer) + ret = pcm_ops->pointer(component, substream, &host); + + if (ret != -EOPNOTSUPP) + return ret ? ret : host; + /* use dsp ops pointer callback directly if set */ if (sof_ops(sdev)->pcm_pointer) return sof_ops(sdev)->pcm_pointer(sdev, substream); diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 9ea2ac5adac7..86bbb531e142 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -103,7 +103,10 @@ struct snd_sof_dai_config_data { * additional memory in the SOF PCM stream structure * @pcm_free: Function pointer for PCM free that can be used for freeing any * additional memory in the SOF PCM stream structure - * @delay: Function pointer for pcm delay calculation + * @pointer: Function pointer for pcm pointer + * Note: the @pointer callback may return -EOPNOTSUPP which should be + * handled in a same way as if the callback is not provided + * @delay: Function pointer for pcm delay reporting * @reset_hw_params_during_stop: Flag indicating whether the hw_params should be reset during the * STOP pcm trigger * @ipc_first_on_start: Send IPC before invoking platform trigger during @@ -124,6 +127,9 @@ struct sof_ipc_pcm_ops { int (*dai_link_fixup)(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params); int (*pcm_setup)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm); void (*pcm_free)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm); + int (*pointer)(struct snd_soc_component *component, + struct snd_pcm_substream *substream, + snd_pcm_uframes_t *pointer); snd_pcm_sframes_t (*delay)(struct snd_soc_component *component, struct snd_pcm_substream *substream); bool reset_hw_params_during_stop; @@ -322,6 +328,7 @@ struct snd_sof_pcm_stream { struct work_struct period_elapsed_work; struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */ bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */ + unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */ /* * flag to indicate that the DSP pipelines should be kept * active or not while suspending the stream diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index d453a4ce3b21..d3c436f82604 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -262,13 +262,25 @@ struct snd_sof_dsp_ops { int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */ /* - * optional callback to retrieve the link DMA position for the substream - * when the position is not reported in the shared SRAM windows but - * instead from a host-accessible hardware counter. + * optional callback to retrieve the number of frames left/arrived from/to + * the DSP on the DAI side (link/codec/DMIC/etc). + * + * The callback is used when the firmware does not provide this information + * via the shared SRAM window and it can be retrieved by host. */ - u64 (*get_stream_position)(struct snd_sof_dev *sdev, - struct snd_soc_component *component, - struct snd_pcm_substream *substream); /* optional */ + u64 (*get_dai_frame_counter)(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream); /* optional */ + + /* + * Optional callback to retrieve the number of bytes left/arrived from/to + * the DSP on the host side (bytes between host ALSA buffer and DSP). + * + * The callback is needed for ALSA delay reporting. + */ + u64 (*get_host_byte_counter)(struct snd_sof_dev *sdev, + struct snd_soc_component *component, + struct snd_pcm_substream *substream); /* optional */ /* host read DSP stream data */ int (*ipc_msg_data)(struct snd_sof_dev *sdev, diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b67617b68e50..f4437015d43a 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer, struct urb *urb; /* create message: */ - msg = kmalloc(sizeof(struct message), GFP_ATOMIC); + msg = kzalloc(sizeof(struct message), GFP_ATOMIC); if (msg == NULL) return -ENOMEM; @@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) int ret; /* initialize USB buffers: */ - line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); + line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); if (!line6->buffer_listen) return -ENOMEM; @@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) return -ENOMEM; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { - line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); + line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); if (!line6->buffer_message) return -ENOMEM; diff --git a/tools/Makefile b/tools/Makefile index 37e9f6804832..276f5d0d53a4 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -11,7 +11,6 @@ help: @echo '' @echo ' acpi - ACPI tools' @echo ' bpf - misc BPF tools' - @echo ' cgroup - cgroup tools' @echo ' counter - counter tools' @echo ' cpupower - a tool for all things x86 CPU power' @echo ' debugging - tools for debugging' @@ -69,7 +68,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE +counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -116,7 +115,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup counter cpupower gpio hv firewire \ +all: acpi counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio mm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ @@ -128,7 +127,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: +counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) selftests_install: @@ -155,7 +154,7 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) -install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ +install: acpi_install counter_install cpupower_install gpio_install \ hv_install firewire_install iio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install mm_install bpf_install x86_energy_perf_policy_install \ @@ -169,7 +168,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: +counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) libapi_clean: @@ -209,7 +208,7 @@ freefall_clean: build_clean: $(call descend,build,clean) -clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ +clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7c7493cb571f..52f076afeb96 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -135,6 +136,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -193,6 +196,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 89d2fc872d9f..964df31da975 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -37,9 +37,7 @@ #include #include -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -76,11 +74,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ - KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ - KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -162,6 +160,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 9f18fa090f1f..1691297a766a 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -28,7 +28,6 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_GUEST_DEBUG /* Not always available, but if it is, this is the correct offset. */ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_TIMA_PAGE_OFFSET 0 #define KVM_XIVE_ESB_PAGE_OFFSET 4 +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index abe926d43cbe..05eaf6db3ad4 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -12,7 +12,320 @@ #include #define __KVM_S390 -#define __KVM_HAVE_GUEST_DEBUG + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ + __u32 len; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ +}; + +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; + +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) /* Device control API: s390-specific devices */ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 25160d26764b..a38f8f9ba657 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -81,10 +81,8 @@ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -/* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -97,7 +95,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ @@ -461,6 +459,14 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + /* * BUG word(s) */ @@ -508,4 +514,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 1f23960d2b06..c492bdc97b05 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -123,6 +123,12 @@ # define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31)) #endif +#ifdef CONFIG_KVM_AMD_SEV +#define DISABLE_SEV_SNP 0 +#else +#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -147,8 +153,9 @@ DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 (DISABLE_IBT) -#define DISABLED_MASK19 0 +#define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h index 3f73ac3ed3a0..d18bfb238f66 100644 --- a/tools/arch/x86/include/asm/irq_vectors.h +++ b/tools/arch/x86/include/asm/irq_vectors.h @@ -84,11 +84,9 @@ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 /* Vector for KVM to deliver posted interrupt IPI */ -#if IS_ENABLED(CONFIG_KVM) #define POSTED_INTR_VECTOR 0xf2 #define POSTED_INTR_WAKEUP_VECTOR 0xf1 #define POSTED_INTR_NESTED_VECTOR 0xf0 -#endif #define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1f9dc9bd13eb..05956bd8bacf 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -176,6 +176,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR @@ -605,34 +613,47 @@ #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 -#define MSR_AMD64_SEV_ES_ENABLED_BIT 1 -#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_SEV_ES_ENABLED_BIT 1 #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) - -/* SNP feature bits enabled by the hypervisor */ -#define MSR_AMD64_SNP_VTOM BIT_ULL(3) -#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4) -#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5) -#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6) -#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7) -#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8) -#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9) -#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10) -#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11) -#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12) -#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14) -#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16) -#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17) - -/* SNP feature bits reserved for future use. */ -#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) -#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) -#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18) +#define MSR_AMD64_SNP_VTOM_BIT 3 +#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT) +#define MSR_AMD64_SNP_REFLECT_VC_BIT 4 +#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT) +#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5 +#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT) +#define MSR_AMD64_SNP_ALT_INJ_BIT 6 +#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT) +#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7 +#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT) +#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8 +#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT) +#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9 +#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT) +#define MSR_AMD64_SNP_VMPL_SSS_BIT 10 +#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT) +#define MSR_AMD64_SNP_SECURE_TSC_BIT 11 +#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT) +#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12 +#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT) +#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) +#define MSR_AMD64_SNP_IBS_VIRT_BIT 14 +#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT) +#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) +#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16 +#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) +#define MSR_AMD64_SNP_SMT_PROT_BIT 17 +#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) +#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +#define MSR_AMD64_RMP_BASE 0xc0010132 +#define MSR_AMD64_RMP_END 0xc0010133 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -719,8 +740,15 @@ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_AMD64_SYSCFG 0xc0010010 -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) + #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index a448d0964fc0..ef11aa4cab42 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -7,6 +7,8 @@ * */ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 @@ -49,7 +50,6 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS -#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -526,9 +526,301 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 -#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) +/* for KVM_CAP_MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +/* for KVM_CAP_XEN_HVM */ +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + union { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + __u64 hva; + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 hva; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u32 pad0; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u32 pad0; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u32 pad1; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u32 pad2; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u32 pad3; + __u64 session_uaddr; + __u32 session_len; + __u32 pad4; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + /* * Masked event layout. * Bits Description @@ -549,10 +841,10 @@ struct kvm_pmu_event_filter { ((__u64)(!!(exclude)) << 55)) #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ - (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) -#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) + (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ @@ -560,7 +852,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ /* x86-specific KVM_EXIT_HYPERCALL flags. */ -#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 4fa4ade1ce74..540c0f2c4fda 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) int i, n; /* recognize hard coded LLVM section name */ - if (strcmp(sec_name, ".arena.1") == 0) { + if (strcmp(sec_name, ".addr_space.1") == 0) { /* this is the name to use in skeleton */ snprintf(buf, buf_sz, "arena"); return true; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 318e2dad27e0..ae57bf69ad4a 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -76,6 +76,12 @@ enum { DNS }; +enum { + IPV4 = 1, + IPV6, + IP_TYPE_MAX +}; + static int in_hand_shake; static char *os_name = ""; @@ -102,6 +108,11 @@ static struct utsname uts_buf; #define MAX_FILE_NAME 100 #define ENTRIES_PER_BLOCK 50 +/* + * Change this entry if the number of addresses increases in future + */ +#define MAX_IP_ENTRIES 64 +#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES) struct kvp_record { char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; @@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type) return 0; } +int ip_version_check(const char *input_addr) +{ + struct in6_addr addr; + + if (inet_pton(AF_INET, input_addr, &addr)) + return IPV4; + else if (inet_pton(AF_INET6, input_addr, &addr)) + return IPV6; + + return -EINVAL; +} + /* * Only IPv4 subnet strings needs to be converted to plen * For IPv6 the subnet is already privided in plen format @@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str) return plen; } +static int process_dns_gateway_nm(FILE *f, char *ip_string, int type, + int ip_sec) +{ + char addr[INET6_ADDRSTRLEN], *output_str; + int ip_offset = 0, error = 0, ip_ver; + char *param_name; + + if (type == DNS) + param_name = "dns"; + else if (type == GATEWAY) + param_name = "gateway"; + else + return -EINVAL; + + output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char)); + if (!output_str) + return -ENOMEM; + + while (1) { + memset(addr, 0, sizeof(addr)); + + if (!parse_ip_val_buffer(ip_string, &ip_offset, addr, + (MAX_IP_ADDR_SIZE * 2))) + break; + + ip_ver = ip_version_check(addr); + if (ip_ver < 0) + continue; + + if ((ip_ver == IPV4 && ip_sec == IPV4) || + (ip_ver == IPV6 && ip_sec == IPV6)) { + /* + * do a bound check to avoid out-of bound writes + */ + if ((OUTSTR_BUF_SIZE - strlen(output_str)) > + (strlen(addr) + 1)) { + strncat(output_str, addr, + OUTSTR_BUF_SIZE - + strlen(output_str) - 1); + strncat(output_str, ",", + OUTSTR_BUF_SIZE - + strlen(output_str) - 1); + } + } else { + continue; + } + } + + if (strlen(output_str)) { + /* + * This is to get rid of that extra comma character + * in the end of the string + */ + output_str[strlen(output_str) - 1] = '\0'; + error = fprintf(f, "%s=%s\n", param_name, output_str); + } + + free(output_str); + return error; +} + static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, - int is_ipv6) + int ip_sec) { char addr[INET6_ADDRSTRLEN]; char subnet_addr[INET6_ADDRSTRLEN]; - int error, i = 0; + int error = 0, i = 0; int ip_offset = 0, subnet_offset = 0; - int plen; + int plen, ip_ver; memset(addr, 0, sizeof(addr)); memset(subnet_addr, 0, sizeof(subnet_addr)); @@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, subnet_addr, (MAX_IP_ADDR_SIZE * 2))) { - if (!is_ipv6) + ip_ver = ip_version_check(addr); + if (ip_ver < 0) + continue; + + if (ip_ver == IPV4 && ip_sec == IPV4) plen = kvp_subnet_to_plen((char *)subnet_addr); - else + else if (ip_ver == IPV6 && ip_sec == IPV6) plen = atoi(subnet_addr); + else + continue; if (plen < 0) return plen; @@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, memset(subnet_addr, 0, sizeof(subnet_addr)); } - return 0; + return error; } static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { - int error = 0; + int error = 0, ip_ver; char if_filename[PATH_MAX]; char nm_filename[PATH_MAX]; FILE *ifcfg_file, *nmfile; char cmd[PATH_MAX]; - int is_ipv6 = 0; char *mac_addr; int str_len; @@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; - if (new_val->addr_family & ADDR_FAMILY_IPV6) { - error = fprintf(nmfile, "\n[ipv6]\n"); - if (error < 0) - goto setval_error; - is_ipv6 = 1; - } else { - error = fprintf(nmfile, "\n[ipv4]\n"); - if (error < 0) - goto setval_error; - } - /* * Now we populate the keyfile format + * + * The keyfile format expects the IPv6 and IPv4 configuration in + * different sections. Therefore we iterate through the list twice, + * once to populate the IPv4 section and the next time for IPv6 */ + ip_ver = IPV4; + do { + if (ip_ver == IPV4) { + error = fprintf(nmfile, "\n[ipv4]\n"); + if (error < 0) + goto setval_error; + } else { + error = fprintf(nmfile, "\n[ipv6]\n"); + if (error < 0) + goto setval_error; + } - if (new_val->dhcp_enabled) { - error = kvp_write_file(nmfile, "method", "", "auto"); + /* + * Write the configuration for ipaddress, netmask, gateway and + * name services + */ + error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr, + (char *)new_val->sub_net, + ip_ver); if (error < 0) goto setval_error; - } else { - error = kvp_write_file(nmfile, "method", "", "manual"); + + /* + * As dhcp_enabled is only valid for ipv4, we do not set dhcp + * methods for ipv6 based on dhcp_enabled flag. + * + * For ipv4, set method to manual only when dhcp_enabled is + * false and specific ipv4 addresses are configured. If neither + * dhcp_enabled is true and no ipv4 addresses are configured, + * set method to 'disabled'. + * + * For ipv6, set method to manual when we configure ipv6 + * addresses. Otherwise set method to 'auto' so that SLAAC from + * RA may be used. + */ + if (ip_ver == IPV4) { + if (new_val->dhcp_enabled) { + error = kvp_write_file(nmfile, "method", "", + "auto"); + if (error < 0) + goto setval_error; + } else if (error) { + error = kvp_write_file(nmfile, "method", "", + "manual"); + if (error < 0) + goto setval_error; + } else { + error = kvp_write_file(nmfile, "method", "", + "disabled"); + if (error < 0) + goto setval_error; + } + } else if (ip_ver == IPV6) { + if (error) { + error = kvp_write_file(nmfile, "method", "", + "manual"); + if (error < 0) + goto setval_error; + } else { + error = kvp_write_file(nmfile, "method", "", + "auto"); + if (error < 0) + goto setval_error; + } + } + + error = process_dns_gateway_nm(nmfile, + (char *)new_val->gate_way, + GATEWAY, ip_ver); if (error < 0) goto setval_error; - } - /* - * Write the configuration for ipaddress, netmask, gateway and - * name services - */ - error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr, - (char *)new_val->sub_net, is_ipv6); - if (error < 0) - goto setval_error; - - /* we do not want ipv4 addresses in ipv6 section and vice versa */ - if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) { - error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + error = process_dns_gateway_nm(nmfile, + (char *)new_val->dns_addr, DNS, + ip_ver); if (error < 0) goto setval_error; - } - if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) { - error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); - if (error < 0) - goto setval_error; - } + ip_ver++; + } while (ip_ver < IP_TYPE_MAX); + fclose(nmfile); fclose(ifcfg_file); diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index 03f721a8a2b1..54ccccf96e21 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -5,12 +5,12 @@ #include /** - * __fls - find last (most-significant) set bit in a long word + * generic___fls - find last (most-significant) set bit in a long word * @word: the word to search * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __fls(unsigned long word) +static __always_inline unsigned long generic___fls(unsigned long word) { int num = BITS_PER_LONG - 1; @@ -41,4 +41,8 @@ static __always_inline unsigned long __fls(unsigned long word) return num; } +#ifndef __HAVE_ARCH___FLS +#define __fls(word) generic___fls(word) +#endif + #endif /* _ASM_GENERIC_BITOPS___FLS_H_ */ diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index b168bb10e1be..26f3ce1dd6e4 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -3,14 +3,14 @@ #define _ASM_GENERIC_BITOPS_FLS_H_ /** - * fls - find last (most-significant) bit set + * generic_fls - find last (most-significant) bit set * @x: the word to search * * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int fls(unsigned int x) +static __always_inline int generic_fls(unsigned int x) { int r = 32; @@ -39,4 +39,8 @@ static __always_inline int fls(unsigned int x) return r; } +#ifndef __HAVE_ARCH_FLS +#define fls(x) generic_fls(x) +#endif + #endif /* _ASM_GENERIC_BITOPS_FLS_H_ */ diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 72535f00572f..72ea363d434d 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 4b0673bf52c2..07cfad817d53 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index f3c82ab5b14c..7d73da098047 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count) { } +static inline int early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} + #endif diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h new file mode 100644 index 000000000000..9c8f17a41ce8 --- /dev/null +++ b/tools/include/linux/panic.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_PANIC_H +#define _TOOLS_LINUX_PANIC_H + +#include +#include +#include + +static inline void panic(const char *fmt, ...) +{ + va_list argp; + + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); + exit(-1); +} + +#endif diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index fd4f9574d177..2ee338860b7e 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -3013,6 +3013,7 @@ struct drm_i915_query_item { * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version) */ __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 @@ -3021,6 +3022,7 @@ struct drm_i915_query_item { #define DRM_I915_QUERY_MEMORY_REGIONS 4 #define DRM_I915_QUERY_HWCONFIG_BLOB 5 #define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7 /* Must be kept compact -- no holes and well documented */ /** @@ -3566,6 +3568,20 @@ struct drm_i915_query_memory_regions { struct drm_i915_memory_region_info regions[]; }; +/** + * struct drm_i915_query_guc_submission_version - query GuC submission interface version + */ +struct drm_i915_query_guc_submission_version { + /** @branch: Firmware branch version. */ + __u32 branch; + /** @major: Firmware major version. */ + __u32 major; + /** @minor: Firmware minor version. */ + __u32 minor; + /** @patch: Firmware patch version. */ + __u32 patch; +}; + /** * DOC: GuC HWCONFIG blob uAPI * diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 48ad69f7722e..45e4e64fd664 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -64,6 +64,24 @@ struct fstrim_range { __u64 minlen; }; +/* + * We include a length field because some filesystems (vfat) have an identifier + * that we do want to expose as a UUID, but doesn't have the standard length. + * + * We use a fixed size buffer beacuse this interface will, by fiat, never + * support "UUIDs" longer than 16 bytes; we don't want to force all downstream + * users to have to deal with that. + */ +struct fsuuid2 { + __u8 len; + __u8 uuid[16]; +}; + +struct fs_sysfs_path { + __u8 len; + __u8 name[128]; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -215,6 +233,13 @@ struct fsxattr { #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) #define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) #define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) +/* Returns the external filesystem UUID, the same one blkid returns */ +#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2) +/* + * Returns the path component under /sys/fs/ that refers to this filesystem; + * also /sys/kernel/debug/ for filesystems with debugfs exports + */ +#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) @@ -301,9 +326,12 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO O_APPEND */ #define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) +/* per-IO negation of O_APPEND */ +#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND) + RWF_APPEND | RWF_NOAPPEND) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index c3308536482b..2190adbe3002 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -16,6 +16,11 @@ #define KVM_API_VERSION 12 +/* + * Backwards-compatible definitions. + */ +#define __KVM_HAVE_GUEST_DEBUG + /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -85,43 +90,6 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 -struct kvm_s390_skeys { - __u64 start_gfn; - __u64 count; - __u64 skeydata_addr; - __u32 flags; - __u32 reserved[9]; -}; - -#define KVM_S390_CMMA_PEEK (1 << 0) - -/** - * kvm_s390_cmma_log - Used for CMMA migration. - * - * Used both for input and output. - * - * @start_gfn: Guest page number to start from. - * @count: Size of the result buffer. - * @flags: Control operation mode via KVM_S390_CMMA_* flags - * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty - * pages are still remaining. - * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set - * in the PGSTE. - * @values: Pointer to the values buffer. - * - * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. - */ -struct kvm_s390_cmma_log { - __u64 start_gfn; - __u32 count; - __u32 flags; - union { - __u64 remaining; - __u64 mask; - }; - __u64 values; -}; - struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 @@ -315,11 +283,6 @@ struct kvm_run { __u32 ipb; } s390_sieic; /* KVM_EXIT_S390_RESET */ -#define KVM_S390_RESET_POR 1 -#define KVM_S390_RESET_CLEAR 2 -#define KVM_S390_RESET_SUBSYSTEM 4 -#define KVM_S390_RESET_CPU_INIT 8 -#define KVM_S390_RESET_IPL 16 __u64 s390_reset_flags; /* KVM_EXIT_S390_UCONTROL */ struct { @@ -536,43 +499,6 @@ struct kvm_translation { __u8 pad[5]; }; -/* for KVM_S390_MEM_OP */ -struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ - __u64 flags; /* flags */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { - struct { - __u8 ar; /* the access register number */ - __u8 key; /* access key, ignored if flag unset */ - __u8 pad1[6]; /* ignored */ - __u64 old_addr; /* ignored if cmpxchg flag unset */ - }; - __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* ignored */ - }; -}; -/* types for kvm_s390_mem_op->op */ -#define KVM_S390_MEMOP_LOGICAL_READ 0 -#define KVM_S390_MEMOP_LOGICAL_WRITE 1 -#define KVM_S390_MEMOP_SIDA_READ 2 -#define KVM_S390_MEMOP_SIDA_WRITE 3 -#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 -#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 - -/* flags for kvm_s390_mem_op->flags */ -#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) -#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - -/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ -#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) -#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) - /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -637,124 +563,6 @@ struct kvm_mp_state { __u32 mp_state; }; -struct kvm_s390_psw { - __u64 mask; - __u64 addr; -}; - -/* valid values for type in kvm_s390_interrupt */ -#define KVM_S390_SIGP_STOP 0xfffe0000u -#define KVM_S390_PROGRAM_INT 0xfffe0001u -#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u -#define KVM_S390_RESTART 0xfffe0003u -#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u -#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u -#define KVM_S390_MCHK 0xfffe1000u -#define KVM_S390_INT_CLOCK_COMP 0xffff1004u -#define KVM_S390_INT_CPU_TIMER 0xffff1005u -#define KVM_S390_INT_VIRTIO 0xffff2603u -#define KVM_S390_INT_SERVICE 0xffff2401u -#define KVM_S390_INT_EMERGENCY 0xffff1201u -#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u -/* Anything below 0xfffe0000u is taken by INT_IO */ -#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ - (((schid)) | \ - ((ssid) << 16) | \ - ((cssid) << 18) | \ - ((ai) << 26)) -#define KVM_S390_INT_IO_MIN 0x00000000u -#define KVM_S390_INT_IO_MAX 0xfffdffffu -#define KVM_S390_INT_IO_AI_MASK 0x04000000u - - -struct kvm_s390_interrupt { - __u32 type; - __u32 parm; - __u64 parm64; -}; - -struct kvm_s390_io_info { - __u16 subchannel_id; - __u16 subchannel_nr; - __u32 io_int_parm; - __u32 io_int_word; -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u32 pad; - __u64 ext_params2; -}; - -struct kvm_s390_pgm_info { - __u64 trans_exc_code; - __u64 mon_code; - __u64 per_address; - __u32 data_exc_code; - __u16 code; - __u16 mon_class_nr; - __u8 per_code; - __u8 per_atmid; - __u8 exc_access_id; - __u8 per_access_id; - __u8 op_access_id; -#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 -#define KVM_S390_PGM_FLAGS_ILC_0 0x02 -#define KVM_S390_PGM_FLAGS_ILC_1 0x04 -#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 -#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 - __u8 flags; - __u8 pad[2]; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 -struct kvm_s390_stop_info { - __u32 flags; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 pad; - __u8 fixed_logout[16]; -}; - -struct kvm_s390_irq { - __u64 type; - union { - struct kvm_s390_io_info io; - struct kvm_s390_ext_info ext; - struct kvm_s390_pgm_info pgm; - struct kvm_s390_emerg_info emerg; - struct kvm_s390_extcall_info extcall; - struct kvm_s390_prefix_info prefix; - struct kvm_s390_stop_info stop; - struct kvm_s390_mchk_info mchk; - char reserved[64]; - } u; -}; - -struct kvm_s390_irq_state { - __u64 buf; - __u32 flags; /* will stay unused for compatibility reasons */ - __u32 len; - __u32 reserved[4]; /* will stay unused for compatibility reasons */ -}; - /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -810,50 +618,6 @@ struct kvm_enable_cap { __u8 pad[64]; }; -/* for KVM_PPC_GET_PVINFO */ - -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) - -struct kvm_ppc_pvinfo { - /* out */ - __u32 flags; - __u32 hcall[4]; - __u8 pad[108]; -}; - -/* for KVM_PPC_GET_SMMU_INFO */ -#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 - -struct kvm_ppc_one_page_size { - __u32 page_shift; /* Page shift (or 0) */ - __u32 pte_enc; /* Encoding in the HPTE (>>12) */ -}; - -struct kvm_ppc_one_seg_page_size { - __u32 page_shift; /* Base page shift of segment (or 0) */ - __u32 slb_enc; /* SLB encoding for BookS */ - struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 -#define KVM_PPC_1T_SEGMENTS 0x00000002 -#define KVM_PPC_NO_HASH 0x00000004 - -struct kvm_ppc_smmu_info { - __u64 flags; - __u32 slb_size; - __u16 data_keys; /* # storage keys supported for data */ - __u16 instr_keys; /* # storage keys supported for instructions */ - struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ -struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; -}; - #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -923,9 +687,7 @@ struct kvm_ppc_resize_hpt { /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 -#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 -#endif #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif @@ -1156,8 +918,6 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_irqchip { __u32 irqchip; __u32 pin; @@ -1222,42 +982,6 @@ struct kvm_irq_routing { struct kvm_irq_routing_entry entries[]; }; -#endif - -#ifdef KVM_CAP_MCE -/* x86 MCE */ -struct kvm_x86_mce { - __u64 status; - __u64 addr; - __u64 misc; - __u64 mcg_status; - __u8 bank; - __u8 pad1[7]; - __u64 pad2[3]; -}; -#endif - -#ifdef KVM_CAP_XEN_HVM -#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) -#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) -#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) -#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) -#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) -#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) -#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) - -struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; - __u64 blob_addr_32; - __u64 blob_addr_64; - __u8 blob_size_32; - __u8 blob_size_64; - __u8 pad2[30]; -}; -#endif - #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) /* * Available with KVM_CAP_IRQFD_RESAMPLE @@ -1442,11 +1166,6 @@ struct kvm_vfio_spapr_tce { struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ -struct kvm_s390_ucas_mapping { - __u64 user_addr; - __u64 vcpu_addr; - __u64 length; -}; #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) @@ -1641,89 +1360,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -}; - -struct kvm_s390_pv_unp { - __u64 addr; - __u64 size; - __u64 tweak; -}; - -enum pv_cmd_dmp_id { - KVM_PV_DUMP_INIT, - KVM_PV_DUMP_CONFIG_STOR_STATE, - KVM_PV_DUMP_COMPLETE, - KVM_PV_DUMP_CPU, -}; - -struct kvm_s390_pv_dmp { - __u64 subcmd; - __u64 buff_addr; - __u64 buff_len; - __u64 gaddr; /* For dump storage state */ - __u64 reserved[4]; -}; - -enum pv_cmd_info_id { - KVM_PV_INFO_VM, - KVM_PV_INFO_DUMP, -}; - -struct kvm_s390_pv_info_dump { - __u64 dump_cpu_buffer_len; - __u64 dump_config_mem_buffer_per_1m; - __u64 dump_config_finalize_len; -}; - -struct kvm_s390_pv_info_vm { - __u64 inst_calls_list[4]; - __u64 max_cpus; - __u64 max_guests; - __u64 max_guest_addr; - __u64 feature_indication; -}; - -struct kvm_s390_pv_info_header { - __u32 id; - __u32 len_max; - __u32 len_written; - __u32 reserved; -}; - -struct kvm_s390_pv_info { - struct kvm_s390_pv_info_header header; - union { - struct kvm_s390_pv_info_dump dump; - struct kvm_s390_pv_info_vm vm; - }; -}; - -enum pv_cmd_id { - KVM_PV_ENABLE, - KVM_PV_DISABLE, - KVM_PV_SET_SEC_PARMS, - KVM_PV_UNPACK, - KVM_PV_VERIFY, - KVM_PV_PREP_RESET, - KVM_PV_UNSHARE_ALL, - KVM_PV_INFO, - KVM_PV_DUMP, - KVM_PV_ASYNC_CLEANUP_PREPARE, - KVM_PV_ASYNC_CLEANUP_PERFORM, -}; - -struct kvm_pv_cmd { - __u32 cmd; /* Command to be executed */ - __u16 rc; /* Ultravisor return code */ - __u16 rrc; /* Ultravisor return reason code */ - __u64 data; /* Data or address */ - __u32 flags; /* flags for future extensions. Must be 0 for now */ - __u32 reserved[3]; -}; - /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) @@ -1737,58 +1373,6 @@ struct kvm_pv_cmd { #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) -struct kvm_xen_hvm_attr { - __u16 type; - __u16 pad[3]; - union { - __u8 long_mode; - __u8 vector; - __u8 runstate_update_flag; - struct { - __u64 gfn; -#define KVM_XEN_INVALID_GFN ((__u64)-1) - } shared_info; - struct { - __u32 send_port; - __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ - __u32 flags; -#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) -#define KVM_XEN_EVTCHN_UPDATE (1 << 1) -#define KVM_XEN_EVTCHN_RESET (1 << 2) - /* - * Events sent by the guest are either looped back to - * the guest itself (potentially on a different port#) - * or signalled via an eventfd. - */ - union { - struct { - __u32 port; - __u32 vcpu; - __u32 priority; - } port; - struct { - __u32 port; /* Zero for eventfd */ - __s32 fd; - } eventfd; - __u32 padding[4]; - } deliver; - } evtchn; - __u32 xen_version; - __u64 pad[8]; - } u; -}; - - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 -#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 -#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 -#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ -#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 - /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) @@ -1799,242 +1383,6 @@ struct kvm_xen_hvm_attr { #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -struct kvm_xen_vcpu_attr { - __u16 type; - __u16 pad[3]; - union { - __u64 gpa; -#define KVM_XEN_INVALID_GPA ((__u64)-1) - __u64 pad[8]; - struct { - __u64 state; - __u64 state_entry_time; - __u64 time_running; - __u64 time_runnable; - __u64 time_blocked; - __u64 time_offline; - } runstate; - __u32 vcpu_id; - struct { - __u32 port; - __u32 priority; - __u64 expires_ns; - } timer; - __u8 vector; - } u; -}; - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 -#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 -#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 - -/* Secure Encrypted Virtualization command */ -enum sev_cmd_id { - /* Guest initialization commands */ - KVM_SEV_INIT = 0, - KVM_SEV_ES_INIT, - /* Guest launch commands */ - KVM_SEV_LAUNCH_START, - KVM_SEV_LAUNCH_UPDATE_DATA, - KVM_SEV_LAUNCH_UPDATE_VMSA, - KVM_SEV_LAUNCH_SECRET, - KVM_SEV_LAUNCH_MEASURE, - KVM_SEV_LAUNCH_FINISH, - /* Guest migration commands (outgoing) */ - KVM_SEV_SEND_START, - KVM_SEV_SEND_UPDATE_DATA, - KVM_SEV_SEND_UPDATE_VMSA, - KVM_SEV_SEND_FINISH, - /* Guest migration commands (incoming) */ - KVM_SEV_RECEIVE_START, - KVM_SEV_RECEIVE_UPDATE_DATA, - KVM_SEV_RECEIVE_UPDATE_VMSA, - KVM_SEV_RECEIVE_FINISH, - /* Guest status and debug commands */ - KVM_SEV_GUEST_STATUS, - KVM_SEV_DBG_DECRYPT, - KVM_SEV_DBG_ENCRYPT, - /* Guest certificates commands */ - KVM_SEV_CERT_EXPORT, - /* Attestation report */ - KVM_SEV_GET_ATTESTATION_REPORT, - /* Guest Migration Extension */ - KVM_SEV_SEND_CANCEL, - - KVM_SEV_NR_MAX, -}; - -struct kvm_sev_cmd { - __u32 id; - __u64 data; - __u32 error; - __u32 sev_fd; -}; - -struct kvm_sev_launch_start { - __u32 handle; - __u32 policy; - __u64 dh_uaddr; - __u32 dh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_launch_update_data { - __u64 uaddr; - __u32 len; -}; - - -struct kvm_sev_launch_secret { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_launch_measure { - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_guest_status { - __u32 handle; - __u32 policy; - __u32 state; -}; - -struct kvm_sev_dbg { - __u64 src_uaddr; - __u64 dst_uaddr; - __u32 len; -}; - -struct kvm_sev_attestation_report { - __u8 mnonce[16]; - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_send_start { - __u32 policy; - __u64 pdh_cert_uaddr; - __u32 pdh_cert_len; - __u64 plat_certs_uaddr; - __u32 plat_certs_len; - __u64 amd_certs_uaddr; - __u32 amd_certs_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_send_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_receive_start { - __u32 handle; - __u32 policy; - __u64 pdh_uaddr; - __u32 pdh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_receive_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) - -struct kvm_assigned_pci_dev { - __u32 assigned_dev_id; - __u32 busnr; - __u32 devfn; - __u32 flags; - __u32 segnr; - union { - __u32 reserved[11]; - }; -}; - -#define KVM_DEV_IRQ_HOST_INTX (1 << 0) -#define KVM_DEV_IRQ_HOST_MSI (1 << 1) -#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) - -#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) -#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) -#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) - -#define KVM_DEV_IRQ_HOST_MASK 0x00ff -#define KVM_DEV_IRQ_GUEST_MASK 0xff00 - -struct kvm_assigned_irq { - __u32 assigned_dev_id; - __u32 host_irq; /* ignored (legacy field) */ - __u32 guest_irq; - __u32 flags; - union { - __u32 reserved[12]; - }; -}; - -struct kvm_assigned_msix_nr { - __u32 assigned_dev_id; - __u16 entry_nr; - __u16 padding; -}; - -#define KVM_MAX_MSIX_PER_DEV 256 -struct kvm_assigned_msix_entry { - __u32 assigned_dev_id; - __u32 gsi; - __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; -}; - -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) - -/* Available with KVM_CAP_ARM_USER_IRQ */ - -/* Bits for run->s.regs.device_irq_level */ -#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) -#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) -#define KVM_ARM_DEV_PMU (1 << 2) - -struct kvm_hyperv_eventfd { - __u32 conn_id; - __s32 fd; - __u32 flags; - __u32 padding[3]; -}; - -#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff -#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) - #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) @@ -2180,33 +1528,6 @@ struct kvm_stats_desc { /* Available with KVM_CAP_S390_ZPCI_OP */ #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) -struct kvm_s390_zpci_op { - /* in */ - __u32 fh; /* target device */ - __u8 op; /* operation to perform */ - __u8 pad[3]; - union { - /* for KVM_S390_ZPCIOP_REG_AEN */ - struct { - __u64 ibv; /* Guest addr of interrupt bit vector */ - __u64 sb; /* Guest addr of summary bit */ - __u32 flags; - __u32 noi; /* Number of interrupts */ - __u8 isc; /* Guest interrupt subclass */ - __u8 sbo; /* Offset of guest summary bit vector */ - __u16 pad; - } reg_aen; - __u64 reserved[8]; - } u; -}; - -/* types for kvm_s390_zpci_op->op */ -#define KVM_S390_ZPCIOP_REG_AEN 0 -#define KVM_S390_ZPCIOP_DEREG_AEN 1 - -/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ -#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) - /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index d5b9cfbd9cea..628d46a0da92 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -416,7 +416,7 @@ struct snd_pcm_hw_params { unsigned int rmask; /* W: requested masks */ unsigned int cmask; /* R: changed masks */ unsigned int info; /* R: Info flags for returned setup */ - unsigned int msbits; /* R: used most significant bits */ + unsigned int msbits; /* R: used most significant bits (in sample bit-width) */ unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index efab29b8935b..a2061fcd612d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,7 +498,7 @@ struct bpf_struct_ops { #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" -#define ARENA_SEC ".arena.1" +#define ARENA_SEC ".addr_space.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags) return syscall(__NR_memfd_create, name, flags); } +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + static int create_placeholder_fd(void) { int fd; @@ -5352,8 +5356,8 @@ retry: goto err_out; } if (map->def.type == BPF_MAP_TYPE_ARENA) { - map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), - PROT_READ | PROT_WRITE, + map->mmaped = mmap((void *)(long)map->map_extra, + bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, map->fd, 0); if (map->mmaped == MAP_FAILED) { diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 6b7eb2d2aaf1..a451cbfbd781 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -228,8 +228,11 @@ class Type(SpecAttr): presence = '' for i in range(0, len(ref)): presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" - if self.presence_type() == 'bit': - code.append(presence + ' = 1;') + # Every layer below last is a nest, so we know it uses bit presence + # last layer is "self" and may be a complex type + if i == len(ref) - 1 and self.presence_type() != 'bit': + continue + code.append(presence + ' = 1;') code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0b10ad008668..0a33d9195b7a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -585,7 +585,7 @@ static int add_dead_ends(struct objtool_file *file) struct section *rsec; struct reloc *reloc; struct instruction *insn; - unsigned long offset; + uint64_t offset; /* * Check for manually annotated dead ends. diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ec5e21932876..4790c735599b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -970,7 +970,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, if (dso->annotate_warned) return -1; - if (not_annotated) { + if (not_annotated || !sym->annotate2) { err = symbol__annotate2(ms, evsel, &browser.arch); if (err) { char msg[BUFSIZ]; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ac002d907d81..50ca92255ff6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2461,6 +2461,9 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, if (parch) *parch = arch; + if (!list_empty(¬es->src->source)) + return 0; + args.arch = arch; args.ms = *ms; if (annotate_opts.full_addr) diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index fb54bd38e7d0..d931a898c434 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -284,6 +284,7 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) struct task_struct *curr; struct mm_struct___old *mm_old; struct mm_struct___new *mm_new; + struct sighand_struct *sighand; switch (flags) { case LCB_F_READ: /* rwsem */ @@ -305,7 +306,9 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) break; case LCB_F_SPIN: /* spinlock */ curr = bpf_get_current_task_btf(); - if (&curr->sighand->siglock == (void *)lock) + sighand = curr->sighand; + + if (sighand && &sighand->siglock == (void *)lock) return LCD_F_SIGHAND_LOCK; break; default: diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8f08c3fd498d..0d3672e5d9ed 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--quiet\fP Do not decode and print the system configuration header information. .PP ++\fB--no-msr\fP Disable all the uses of the MSR driver. ++.PP ++\fB--no-perf\fP Disable all the uses of the perf API. ++.PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--num_iterations num\fP number of the measurement iterations. @@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. .PP -\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. +\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used. .PP -\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. +\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used. +.PP +\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used. +.PP +\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used. +.PP +\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used. +.PP +\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used. .PP \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. .PP @@ -370,7 +382,7 @@ below the processor's base frequency. Busy% = MPERF_delta/TSC_delta -Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval +Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval Note that these calculations depend on TSC_delta, so they are not reliable during intervals when TSC_MHz is not running at the base frequency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7a334377f92b..98256468e248 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2023 Intel Corporation. + * Copyright (c) 2024 Intel Corporation. * Len Brown */ @@ -36,6 +36,8 @@ #include #include #include +#include +#include #define UNUSED(x) (void)(x) @@ -53,9 +55,13 @@ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define MAX_NOFILE 0x8000 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; +enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; +enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; struct msr_counter { unsigned int msr_num; @@ -127,6 +133,9 @@ struct msr_counter bic[] = { { 0x0, "IPC", "", 0, 0, 0, NULL, 0 }, { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 }, { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -185,11 +194,14 @@ struct msr_counter bic[] = { #define BIC_IPC (1ULL << 52) #define BIC_CORE_THROT_CNT (1ULL << 53) #define BIC_UNCORE_MHZ (1ULL << 54) +#define BIC_SAM_mc6 (1ULL << 55) +#define BIC_SAMMHz (1ULL << 56) +#define BIC_SAMACTMHz (1ULL << 57) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) -#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) +#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) +struct amperf_group_fd; + char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; +struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -242,11 +257,8 @@ char *output_buffer, *outp; unsigned int do_dts; unsigned int do_ptm; unsigned int do_ipc; -unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; -unsigned int gfx_cur_mhz; -unsigned int gfx_act_mhz; unsigned int tj_max; unsigned int tj_max_override; double rapl_power_units, rapl_time_units; @@ -263,6 +275,28 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int first_counter_read = 1; int ignore_stdin; +bool no_msr; +bool no_perf; +enum amperf_source amperf_source; + +enum gfx_sysfs_idx { + GFX_rc6, + GFX_MHz, + GFX_ACTMHz, + SAM_mc6, + SAM_MHz, + SAM_ACTMHz, + GFX_MAX +}; + +struct gfx_sysfs_info { + const char *path; + FILE *fp; + unsigned int val; + unsigned long long val_ull; +}; + +static struct gfx_sysfs_info gfx_info[GFX_MAX]; int get_msr(int cpu, off_t offset, unsigned long long *msr); @@ -652,6 +686,7 @@ static const struct platform_features icx_features = { .bclk_freq = BCLK_100MHZ, .supported_cstates = CC1 | CC6 | PC2 | PC6, .cst_limit = CST_LIMIT_ICX, + .has_msr_core_c1_res = 1, .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, @@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_THREAD_COUNTERS 24 #define BITMASK_SIZE 32 +/* Indexes used to map data read from perf and MSRs into global variables */ +enum rapl_rci_index { + RAPL_RCI_INDEX_ENERGY_PKG = 0, + RAPL_RCI_INDEX_ENERGY_CORES = 1, + RAPL_RCI_INDEX_DRAM = 2, + RAPL_RCI_INDEX_GFX = 3, + RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, + RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, + RAPL_RCI_INDEX_CORE_ENERGY = 6, + NUM_RAPL_COUNTERS, +}; + +enum rapl_unit { + RAPL_UNIT_INVALID, + RAPL_UNIT_JOULES, + RAPL_UNIT_WATTS, +}; + +struct rapl_counter_info_t { + unsigned long long data[NUM_RAPL_COUNTERS]; + enum rapl_source source[NUM_RAPL_COUNTERS]; + unsigned long long flags[NUM_RAPL_COUNTERS]; + double scale[NUM_RAPL_COUNTERS]; + enum rapl_unit unit[NUM_RAPL_COUNTERS]; + + union { + /* Active when source == RAPL_SOURCE_MSR */ + struct { + unsigned long long msr[NUM_RAPL_COUNTERS]; + unsigned long long msr_mask[NUM_RAPL_COUNTERS]; + int msr_shift[NUM_RAPL_COUNTERS]; + }; + }; + + int fd_perf; +}; + +/* struct rapl_counter_info_t for each RAPL domain */ +struct rapl_counter_info_t *rapl_counter_info_perdomain; + +#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) + +struct rapl_counter_arch_info { + int feature_mask; /* Mask for testing if the counter is supported on host */ + const char *perf_subsys; + const char *perf_name; + unsigned long long msr; + unsigned long long msr_mask; + int msr_shift; /* Positive mean shift right, negative mean shift left */ + double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ + unsigned int rci_index; /* Maps data from perf counters to global variables */ + unsigned long long bic; + double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ + unsigned long long flags; +}; + +static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { + { + .feature_mask = RAPL_PKG, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic = BIC_PkgWatt | BIC_Pkg_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STAT, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic = BIC_PkgWatt | BIC_Pkg_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_CORE_ENERGY_STATUS, + .perf_subsys = "power", + .perf_name = "energy-cores", + .msr = MSR_PP0_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, + .bic = BIC_CorWatt | BIC_Cor_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_DRAM, + .perf_subsys = "power", + .perf_name = "energy-ram", + .msr = MSR_DRAM_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_dram_energy_units, + .rci_index = RAPL_RCI_INDEX_DRAM, + .bic = BIC_RAMWatt | BIC_RAM_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_GFX, + .perf_subsys = "power", + .perf_name = "energy-gpu", + .msr = MSR_PP1_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_GFX, + .bic = BIC_GFXWatt | BIC_GFX_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PKG_PERF_STATUS, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_PKG_PERF_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_time_units, + .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, + .bic = BIC_PKG__, + .compat_scale = 100.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_DRAM_PERF_STATUS, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_DRAM_PERF_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_time_units, + .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, + .bic = BIC_RAM__, + .compat_scale = 100.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_CORE_ENERGY_STAT, + .msr_mask = 0xFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, + .bic = BIC_CorWatt | BIC_Cor_J, + .compat_scale = 1.0, + .flags = 0, + }, +}; + +struct rapl_counter { + unsigned long long raw_value; + enum rapl_unit unit; + double scale; +}; + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -974,7 +1178,7 @@ struct core_data { unsigned long long c7; unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; - unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ + struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_COUNTERS]; @@ -989,8 +1193,8 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; - unsigned long long cpu_lpi; - unsigned long long sys_lpi; + long long cpu_lpi; + long long sys_lpi; unsigned long long pkg_wtd_core_c0; unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; @@ -998,13 +1202,16 @@ struct pkg_data { long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int gfx_act_mhz; + long long sam_mc6_ms; + unsigned int sam_mhz; + unsigned int sam_act_mhz; unsigned int package_id; - unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ - unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ - unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ - unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ - unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ - unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ + struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned int uncore_mhz; unsigned long long counter[MAX_ADDED_COUNTERS]; @@ -1150,6 +1357,38 @@ struct sys_counters { struct msr_counter *pp; } sys; +void free_sys_counters(void) +{ + struct msr_counter *p = sys.tp, *pnext = NULL; + + while (p) { + pnext = p->next; + free(p); + p = pnext; + } + + p = sys.cp, pnext = NULL; + while (p) { + pnext = p->next; + free(p); + p = pnext; + } + + p = sys.pp, pnext = NULL; + while (p) { + pnext = p->next; + free(p); + p = pnext; + } + + sys.added_thread_counters = 0; + sys.added_core_counters = 0; + sys.added_package_counters = 0; + sys.tp = NULL; + sys.cp = NULL; + sys.pp = NULL; +} + struct system_summary { struct thread_data threads; struct core_data cores; @@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu) sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " + "or run with --no-msr, or run as root", pathname); fd_percpu[cpu] = fd; return fd; } +static void bic_disable_msr_access(void) +{ + const unsigned long bic_msrs = + BIC_SMI | + BIC_CPU_c1 | + BIC_CPU_c3 | + BIC_CPU_c6 | + BIC_CPU_c7 | + BIC_Mod_c6 | + BIC_CoreTmp | + BIC_Totl_c0 | + BIC_Any_c0 | + BIC_GFX_c0 | + BIC_CPUGFX | + BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp; + + bic_enabled &= ~bic_msrs; + + free_sys_counters(); +} + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { + assert(!no_perf); + return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); } -static int perf_instr_count_open(int cpu_num) +static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) { - struct perf_event_attr pea; - int fd; + struct perf_event_attr attr; + const pid_t pid = -1; + const unsigned long flags = 0; - memset(&pea, 0, sizeof(struct perf_event_attr)); - pea.type = PERF_TYPE_HARDWARE; - pea.size = sizeof(struct perf_event_attr); - pea.config = PERF_COUNT_HW_INSTRUCTIONS; + assert(!no_perf); - /* counter for cpu_num, including user + kernel and all processes */ - fd = perf_event_open(&pea, -1, cpu_num, -1, 0); - if (fd == -1) { - warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname); - BIC_NOT_PRESENT(BIC_IPC); - } + memset(&attr, 0, sizeof(struct perf_event_attr)); + + attr.type = type; + attr.size = sizeof(struct perf_event_attr); + attr.config = config; + attr.disabled = 0; + attr.sample_type = PERF_SAMPLE_IDENTIFIER; + attr.read_format = read_format; + + const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); return fd; } @@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu) if (fd_instr_count_percpu[cpu]) return fd_instr_count_percpu[cpu]; - fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu); + fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); return fd_instr_count_percpu[cpu]; } @@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) { ssize_t retval; + assert(!no_msr); + retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); if (retval != sizeof *msr) @@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } +int probe_msr(int cpu, off_t offset) +{ + ssize_t retval; + unsigned long long dummy; + + assert(!no_msr); + + retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); + + if (retval != sizeof(dummy)) + return 1; + + return 0; +} + #define MAX_DEFERRED 16 char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; @@ -1369,6 +1651,8 @@ void help(void) " Override default 5-second measurement interval\n" " -J, --Joules displays energy in Joules instead of Watts\n" " -l, --list list column headers only\n" + " -M, --no-msr Disable all uses of the MSR driver\n" + " -P, --no-perf Disable all uses of the perf API\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" " -N, --header_iterations num\n" @@ -1573,6 +1857,15 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXACTMHz)) outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_SAM_mc6)) + outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); + + if (DO_BIC(BIC_SAMMHz)) + outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); + + if (DO_BIC(BIC_SAMACTMHz)) + outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); if (DO_BIC(BIC_Any_c0)) @@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "SMI: %d\n", t->smi_count); for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]); + outp += + sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, + t->counter[i], mp->path); } } - if (c) { + if (c && is_cpu_first_thread_in_core(t, c, p)) { outp += sprintf(outp, "core: %d\n", c->core_id); outp += sprintf(outp, "c3: %016llX\n", c->c3); outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); - outp += sprintf(outp, "Joules: %0X\n", c->core_energy); + + const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; + const double energy_scale = c->core_energy.scale; + + if (c->core_energy.unit == RAPL_UNIT_JOULES) + outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); + outp += + sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, + c->counter[i], mp->path); } outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } - if (p) { + if (p && is_cpu_first_core_in_package(t, c, p)) { outp += sprintf(outp, "package: %d\n", p->package_id); outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); @@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); - outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); - outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); - outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); - outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]); + outp += + sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, + p->counter[i], mp->path); } } @@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; } +double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) +{ + assert(desired_unit != RAPL_UNIT_INVALID); + + /* + * For now we don't expect anything other than joules, + * so just simplify the logic. + */ + assert(c->unit == RAPL_UNIT_JOULES); + + const double scaled = c->raw_value * c->scale; + + if (desired_unit == RAPL_UNIT_WATTS) + return scaled / interval; + return scaled; +} + /* * column formatting convention & formats */ @@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); /* print per-package data only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) @@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_GFXACTMHz)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + /* SAMmc6 */ + if (DO_BIC(BIC_SAM_mc6)) { + if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ + outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); + } else { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + p->sam_mc6_ms / 10.0 / interval_float); + } + } + + /* SAMMHz */ + if (DO_BIC(BIC_SAMMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); + + /* SAMACTMHz */ + if (DO_BIC(BIC_SAMACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); @@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); - if (DO_BIC(BIC_CPU_LPI)) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); - if (DO_BIC(BIC_SYS_LPI)) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); + if (DO_BIC(BIC_CPU_LPI)) { + if (p->cpu_lpi >= 0) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + else + outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_SYS_LPI)) { + if (p->sys_lpi >= 0) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + 100.0 * p->sys_lpi / 1000000.0 / interval_float); + else + outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); + } if (DO_BIC(BIC_PkgWatt)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - + sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); + sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_GFXWatt)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); + sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAMWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - p->energy_dram * rapl_dram_energy_units / interval_float); + rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Pkg_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_GFX_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_RAM_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_PKG__)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAM__)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); /* UncMHz */ if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); @@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_mhz = new->gfx_mhz; old->gfx_act_mhz = new->gfx_act_mhz; - old->energy_pkg = new->energy_pkg - old->energy_pkg; - old->energy_cores = new->energy_cores - old->energy_cores; - old->energy_gfx = new->energy_gfx - old->energy_gfx; - old->energy_dram = new->energy_dram - old->energy_dram; - old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; - old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; + /* flag an error when mc6 counter resets/wraps */ + if (old->sam_mc6_ms > new->sam_mc6_ms) + old->sam_mc6_ms = -1; + else + old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; + + old->sam_mhz = new->sam_mhz; + old->sam_act_mhz = new->sam_act_mhz; + + old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; + old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; + old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; + old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; + old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; + old->rapl_dram_perf_status.raw_value = + new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->core_throt_cnt = new->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; - DELTA_WRAP32(new->core_energy, old->core_energy); + DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; } +void rapl_counter_clear(struct rapl_counter *c) +{ + c->raw_value = 0; + c->scale = 0.0; + c->unit = RAPL_UNIT_INVALID; +} + void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; @@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->mc6_us = 0; c->core_temp_c = 0; - c->core_energy = 0; + rapl_counter_clear(&c->core_energy); c->core_throt_cnt = 0; p->pkg_wtd_core_c0 = 0; @@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->cpu_lpi = 0; p->sys_lpi = 0; - p->energy_pkg = 0; - p->energy_dram = 0; - p->energy_cores = 0; - p->energy_gfx = 0; - p->rapl_pkg_perf_status = 0; - p->rapl_dram_perf_status = 0; + rapl_counter_clear(&p->energy_pkg); + rapl_counter_clear(&p->energy_dram); + rapl_counter_clear(&p->energy_cores); + rapl_counter_clear(&p->energy_gfx); + rapl_counter_clear(&p->rapl_pkg_perf_status); + rapl_counter_clear(&p->rapl_dram_perf_status); p->pkg_temp_c = 0; p->gfx_rc6_ms = 0; p->uncore_mhz = 0; p->gfx_mhz = 0; p->gfx_act_mhz = 0; + p->sam_mc6_ms = 0; + p->sam_mhz = 0; + p->sam_act_mhz = 0; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->counter[i] = 0; } +void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) +{ + /* Copy unit and scale from src if dst is not initialized */ + if (dst->unit == RAPL_UNIT_INVALID) { + dst->unit = src->unit; + dst->scale = src->scale; + } + + assert(dst->unit == src->unit); + assert(dst->scale == src->scale); + + dst->raw_value += src->raw_value; +} + int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; @@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); - average.cores.core_energy += c->core_energy; + rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.cpu_lpi = p->cpu_lpi; average.packages.sys_lpi = p->sys_lpi; - average.packages.energy_pkg += p->energy_pkg; - average.packages.energy_dram += p->energy_dram; - average.packages.energy_cores += p->energy_cores; - average.packages.energy_gfx += p->energy_gfx; + rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); + rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); + rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); + rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.uncore_mhz = p->uncore_mhz; average.packages.gfx_mhz = p->gfx_mhz; average.packages.gfx_act_mhz = p->gfx_act_mhz; + average.packages.sam_mc6_ms = p->sam_mc6_ms; + average.packages.sam_mhz = p->sam_mhz; + average.packages.sam_act_mhz = p->sam_act_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); - average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; - average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; + rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); + rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) - continue; - average.packages.counter[i] += p->counter[i]; + if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.counter[i] = p->counter[i]; + else + average.packages.counter[i] += p->counter[i]; } return 0; } @@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path) int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) { if (mp->msr_num != 0) { + assert(!no_msr); if (get_msr(cpu, mp->msr_num, counterp)) return -1; } else { @@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die) { char path[128]; - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package, + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); return (snapshot_sysfs_counter(path) / 1000); @@ -2627,6 +3023,9 @@ int get_epb(int cpu) return epb; msr_fallback: + if (no_msr) + return -1; + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); return msr & 0xf; @@ -2700,6 +3099,351 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt) return 0; } +struct amperf_group_fd { + int aperf; /* Also the group descriptor */ + int mperf; +}; + +static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) +{ + int fdmt; + int bytes_read; + char buf[64]; + int ret = -1; + + fdmt = open(path, O_RDONLY, 0); + if (fdmt == -1) { + if (debug) + fprintf(stderr, "Failed to parse perf counter info %s\n", path); + ret = -1; + goto cleanup_and_exit; + } + + bytes_read = read(fdmt, buf, sizeof(buf) - 1); + if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { + if (debug) + fprintf(stderr, "Failed to parse perf counter info %s\n", path); + ret = -1; + goto cleanup_and_exit; + } + + buf[bytes_read] = '\0'; + + if (sscanf(buf, parse_format, value_ptr) != 1) { + if (debug) + fprintf(stderr, "Failed to parse perf counter info %s\n", path); + ret = -1; + goto cleanup_and_exit; + } + + ret = 0; + +cleanup_and_exit: + close(fdmt); + return ret; +} + +static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) +{ + unsigned int v; + int status; + + status = read_perf_counter_info(path, parse_format, &v); + if (status) + v = -1; + + return v; +} + +static unsigned int read_msr_type(void) +{ + const char *const path = "/sys/bus/event_source/devices/msr/type"; + const char *const format = "%u"; + + return read_perf_counter_info_n(path, format); +} + +static unsigned int read_aperf_config(void) +{ + const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; + const char *const format = "event=%x"; + + return read_perf_counter_info_n(path, format); +} + +static unsigned int read_mperf_config(void) +{ + const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; + const char *const format = "event=%x"; + + return read_perf_counter_info_n(path, format); +} + +static unsigned int read_perf_type(const char *subsys) +{ + const char *const path_format = "/sys/bus/event_source/devices/%s/type"; + const char *const format = "%u"; + char path[128]; + + snprintf(path, sizeof(path), path_format, subsys); + + return read_perf_counter_info_n(path, format); +} + +static unsigned int read_rapl_config(const char *subsys, const char *event_name) +{ + const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; + const char *const format = "event=%x"; + char path[128]; + + snprintf(path, sizeof(path), path_format, subsys, event_name); + + return read_perf_counter_info_n(path, format); +} + +static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) +{ + const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; + const char *const format = "%s"; + char path[128]; + char unit_buffer[16]; + + snprintf(path, sizeof(path), path_format, subsys, event_name); + + read_perf_counter_info(path, format, &unit_buffer); + if (strcmp("Joules", unit_buffer) == 0) + return RAPL_UNIT_JOULES; + + return RAPL_UNIT_INVALID; +} + +static double read_perf_rapl_scale(const char *subsys, const char *event_name) +{ + const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; + const char *const format = "%lf"; + char path[128]; + double scale; + + snprintf(path, sizeof(path), path_format, subsys, event_name); + + if (read_perf_counter_info(path, format, &scale)) + return 0.0; + + return scale; +} + +static struct amperf_group_fd open_amperf_fd(int cpu) +{ + const unsigned int msr_type = read_msr_type(); + const unsigned int aperf_config = read_aperf_config(); + const unsigned int mperf_config = read_mperf_config(); + struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; + + fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); + fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); + + return fds; +} + +static int get_amperf_fd(int cpu) +{ + assert(fd_amperf_percpu); + + if (fd_amperf_percpu[cpu].aperf) + return fd_amperf_percpu[cpu].aperf; + + fd_amperf_percpu[cpu] = open_amperf_fd(cpu); + + return fd_amperf_percpu[cpu].aperf; +} + +/* Read APERF, MPERF and TSC using the perf API. */ +static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) +{ + union { + struct { + unsigned long nr_entries; + unsigned long aperf; + unsigned long mperf; + }; + + unsigned long as_array[3]; + } cnt; + + const int fd_amperf = get_amperf_fd(cpu); + + /* + * Read the TSC with rdtsc, because we want the absolute value and not + * the offset from the start of the counter. + */ + t->tsc = rdtsc(); + + const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); + + if (n != sizeof(cnt.as_array)) + return -2; + + t->aperf = cnt.aperf * aperf_mperf_multiplier; + t->mperf = cnt.mperf * aperf_mperf_multiplier; + + return 0; +} + +/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ +static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) +{ + unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; + int aperf_mperf_retry_count = 0; + + /* + * The TSC, APERF and MPERF must be read together for + * APERF/MPERF and MPERF/TSC to give accurate results. + * + * Unfortunately, APERF and MPERF are read by + * individual system call, so delays may occur + * between them. If the time to read them + * varies by a large amount, we re-read them. + */ + + /* + * This initial dummy APERF read has been seen to + * reduce jitter in the subsequent reads. + */ + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + +retry: + t->tsc = rdtsc(); /* re-read close to APERF */ + + tsc_before = t->tsc; + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + + tsc_between = rdtsc(); + + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) + return -4; + + tsc_after = rdtsc(); + + aperf_time = tsc_between - tsc_before; + mperf_time = tsc_after - tsc_between; + + /* + * If the system call latency to read APERF and MPERF + * differ by more than 2x, then try again. + */ + if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { + aperf_mperf_retry_count++; + if (aperf_mperf_retry_count < 5) + goto retry; + else + warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); + } + aperf_mperf_retry_count = 0; + + t->aperf = t->aperf * aperf_mperf_multiplier; + t->mperf = t->mperf * aperf_mperf_multiplier; + + return 0; +} + +size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) +{ + size_t ret = 0; + + for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) + if (rci->source[i] == RAPL_SOURCE_PERF) + ++ret; + + return ret; +} + +void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) +{ + rc->raw_value = rci->data[idx]; + rc->unit = rci->unit[idx]; + rc->scale = rci->scale[idx]; +} + +int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p) +{ + unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain]; + + if (debug) + fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); + + assert(rapl_counter_info_perdomain); + + /* + * If we have any perf counters to read, read them all now, in bulk + */ + if (rci->fd_perf != -1) { + size_t num_perf_counters = rapl_counter_info_count_perf(rci); + const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); + const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); + + if (actual_read_size != expected_read_size) + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, + actual_read_size); + } + + for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { + switch (rci->source[i]) { + case RAPL_SOURCE_NONE: + break; + + case RAPL_SOURCE_PERF: + assert(pi < ARRAY_SIZE(perf_data)); + assert(rci->fd_perf != -1); + + if (debug) + fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", + i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); + + rci->data[i] = perf_data[pi]; + + ++pi; + break; + + case RAPL_SOURCE_MSR: + if (debug) + fprintf(stderr, "Reading rapl counter via msr at %u\n", i); + + assert(!no_msr); + if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { + if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) + return -13 - i; + } else { + if (get_msr(cpu, rci->msr[i], &rci->data[i])) + return -13 - i; + } + + rci->data[i] &= rci->msr_mask[i]; + if (rci->msr_shift[i] >= 0) + rci->data[i] >>= abs(rci->msr_shift[i]); + else + rci->data[i] <<= abs(rci->msr_shift[i]); + + break; + } + } + + _Static_assert(NUM_RAPL_COUNTERS == 7); + write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); + write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); + write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); + write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); + write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); + write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); + write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); + + return 0; +} + /* * get_counters(...) * migrate to cpu @@ -2709,12 +3453,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; unsigned long long msr; - int aperf_mperf_retry_count = 0; struct msr_counter *mp; int i; + int status; if (cpu_migrate(cpu)) { - fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); + fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); return -1; } @@ -2722,63 +3466,26 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (first_counter_read) get_apic_id(t); -retry: + t->tsc = rdtsc(); /* we are running on local CPU of interest */ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) || soft_c1_residency_display(BIC_Avg_MHz)) { - unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; + int status = -1; - /* - * The TSC, APERF and MPERF must be read together for - * APERF/MPERF and MPERF/TSC to give accurate results. - * - * Unfortunately, APERF and MPERF are read by - * individual system call, so delays may occur - * between them. If the time to read them - * varies by a large amount, we re-read them. - */ + assert(!no_perf || !no_msr); - /* - * This initial dummy APERF read has been seen to - * reduce jitter in the subsequent reads. - */ - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - t->tsc = rdtsc(); /* re-read close to APERF */ - - tsc_before = t->tsc; - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - tsc_between = rdtsc(); - - if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) - return -4; - - tsc_after = rdtsc(); - - aperf_time = tsc_between - tsc_before; - mperf_time = tsc_after - tsc_between; - - /* - * If the system call latency to read APERF and MPERF - * differ by more than 2x, then try again. - */ - if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { - aperf_mperf_retry_count++; - if (aperf_mperf_retry_count < 5) - goto retry; - else - warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); + switch (amperf_source) { + case AMPERF_SOURCE_PERF: + status = read_aperf_mperf_tsc_perf(t, cpu); + break; + case AMPERF_SOURCE_MSR: + status = read_aperf_mperf_tsc_msr(t, cpu); + break; } - aperf_mperf_retry_count = 0; - t->aperf = t->aperf * aperf_mperf_multiplier; - t->mperf = t->mperf * aperf_mperf_multiplier; + if (status != 0) + return status; } if (DO_BIC(BIC_IPC)) @@ -2806,6 +3513,12 @@ retry: if (!is_cpu_first_thread_in_core(t, c, p)) goto done; + if (platform->has_per_core_rapl) { + status = get_rapl_counters(cpu, c->core_id, c, p); + if (status != 0) + return status; + } + if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; @@ -2846,12 +3559,6 @@ retry: if (DO_BIC(BIC_CORE_THROT_CNT)) get_core_throt_cnt(cpu, &c->core_throt_cnt); - if (platform->rapl_msrs & RAPL_AMD_F17H) { - if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) - return -14; - c->core_energy = msr & 0xFFFFFFFF; - } - for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &c->counter[i])) return -10; @@ -2911,59 +3618,39 @@ retry: if (DO_BIC(BIC_SYS_LPI)) p->sys_lpi = cpuidle_cur_sys_lpi_us; - if (platform->rapl_msrs & RAPL_PKG) { - if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) - return -13; - p->energy_pkg = msr; - } - if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) { - if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) - return -14; - p->energy_cores = msr; - } - if (platform->rapl_msrs & RAPL_DRAM) { - if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) - return -15; - p->energy_dram = msr; - } - if (platform->rapl_msrs & RAPL_GFX) { - if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) - return -16; - p->energy_gfx = msr; - } - if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) { - if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) - return -16; - p->rapl_pkg_perf_status = msr; - } - if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) { - if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) - return -16; - p->rapl_dram_perf_status = msr; - } - if (platform->rapl_msrs & RAPL_AMD_F17H) { - if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) - return -13; - p->energy_pkg = msr; + if (!platform->has_per_core_rapl) { + status = get_rapl_counters(cpu, p->package_id, c, p); + if (status != 0) + return status; } + if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); } - if (DO_BIC(BIC_GFX_rc6)) - p->gfx_rc6_ms = gfx_cur_rc6_ms; - /* n.b. assume die0 uncore frequency applies to whole package */ if (DO_BIC(BIC_UNCORE_MHZ)) p->uncore_mhz = get_uncore_mhz(p->package_id, 0); + if (DO_BIC(BIC_GFX_rc6)) + p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; + if (DO_BIC(BIC_GFXMHz)) - p->gfx_mhz = gfx_cur_mhz; + p->gfx_mhz = gfx_info[GFX_MHz].val; if (DO_BIC(BIC_GFXACTMHz)) - p->gfx_act_mhz = gfx_act_mhz; + p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; + + if (DO_BIC(BIC_SAM_mc6)) + p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; + + if (DO_BIC(BIC_SAMMHz)) + p->sam_mhz = gfx_info[SAM_MHz].val; + + if (DO_BIC(BIC_SAMACTMHz)) + p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &p->counter[i])) @@ -3053,7 +3740,7 @@ void probe_cst_limit(void) unsigned long long msr; int *pkg_cstate_limits; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; switch (platform->cst_limit) { @@ -3097,7 +3784,7 @@ static void dump_platform_info(void) unsigned long long msr; unsigned int ratio; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); @@ -3115,7 +3802,7 @@ static void dump_power_ctl(void) { unsigned long long msr; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); @@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void) { unsigned long long msr; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); @@ -3393,7 +4080,7 @@ void print_irtl(void) { unsigned long long msr; - if (!platform->has_irtl_msrs) + if (!platform->has_irtl_msrs || no_msr) return; if (platform->supported_cstates & PC3) { @@ -3443,12 +4130,64 @@ void free_fd_percpu(void) { int i; + if (!fd_percpu) + return; + for (i = 0; i < topo.max_cpu_num + 1; ++i) { if (fd_percpu[i] != 0) close(fd_percpu[i]); } free(fd_percpu); + fd_percpu = NULL; +} + +void free_fd_amperf_percpu(void) +{ + int i; + + if (!fd_amperf_percpu) + return; + + for (i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_amperf_percpu[i].mperf != 0) + close(fd_amperf_percpu[i].mperf); + + if (fd_amperf_percpu[i].aperf != 0) + close(fd_amperf_percpu[i].aperf); + } + + free(fd_amperf_percpu); + fd_amperf_percpu = NULL; +} + +void free_fd_instr_count_percpu(void) +{ + if (!fd_instr_count_percpu) + return; + + for (int i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_instr_count_percpu[i] != 0) + close(fd_instr_count_percpu[i]); + } + + free(fd_instr_count_percpu); + fd_instr_count_percpu = NULL; +} + +void free_fd_rapl_percpu(void) +{ + if (!rapl_counter_info_perdomain) + return; + + const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages; + + for (int domain_id = 0; domain_id < num_domains; ++domain_id) { + if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) + close(rapl_counter_info_perdomain[domain_id].fd_perf); + } + + free(rapl_counter_info_perdomain); } void free_all_buffers(void) @@ -3492,6 +4231,9 @@ void free_all_buffers(void) outp = NULL; free_fd_percpu(); + free_fd_instr_count_percpu(); + free_fd_amperf_percpu(); + free_fd_rapl_percpu(); free(irq_column_2_cpu); free(irqs_per_cpu); @@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup) err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); } +void linux_perf_init(void); +void rapl_perf_init(void); + void re_initialize(void) { free_all_buffers(); setup_all_buffers(false); - fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); + linux_perf_init(); + rapl_perf_init(); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, + topo.allowed_cpus); } void set_max_cpu_num(void) @@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void) } /* - * snapshot_gfx_rc6_ms() + * snapshot_graphics() * - * record snapshot of - * /sys/class/drm/card0/power/rc6_residency_ms + * record snapshot of specified graphics sysfs knob * * return 1 if config change requires a restart, else return 0 */ -int snapshot_gfx_rc6_ms(void) +int snapshot_graphics(int idx) { FILE *fp; int retval; - fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); - - retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); - if (retval != 1) - err(1, "GFX rc6"); - - fclose(fp); - - return 0; -} - -/* - * snapshot_gfx_mhz() - * - * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz - * when /sys/class/drm/card0/gt_cur_freq_mhz is not available. - * - * return 1 if config change requires a restart, else return 0 - */ -int snapshot_gfx_mhz(void) -{ - static FILE *fp; - int retval; - - if (fp == NULL) { - fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r"); - if (!fp) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); - } else { - rewind(fp); - fflush(fp); + switch (idx) { + case GFX_rc6: + case SAM_mc6: + fp = fopen_or_die(gfx_info[idx].path, "r"); + retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); + if (retval != 1) + err(1, "rc6"); + fclose(fp); + return 0; + case GFX_MHz: + case GFX_ACTMHz: + case SAM_MHz: + case SAM_ACTMHz: + if (gfx_info[idx].fp == NULL) { + gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); + } else { + rewind(gfx_info[idx].fp); + fflush(gfx_info[idx].fp); + } + retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); + if (retval != 1) + err(1, "MHz"); + return 0; + default: + return -EINVAL; } - - retval = fscanf(fp, "%d", &gfx_cur_mhz); - if (retval != 1) - err(1, "GFX MHz"); - - return 0; -} - -/* - * snapshot_gfx_cur_mhz() - * - * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz - * when /sys/class/drm/card0/gt_act_freq_mhz is not available. - * - * return 1 if config change requires a restart, else return 0 - */ -int snapshot_gfx_act_mhz(void) -{ - static FILE *fp; - int retval; - - if (fp == NULL) { - fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r"); - if (!fp) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); - } else { - rewind(fp); - fflush(fp); - } - - retval = fscanf(fp, "%d", &gfx_act_mhz); - if (retval != 1) - err(1, "GFX ACT MHz"); - - return 0; } /* @@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void) return 1; if (DO_BIC(BIC_GFX_rc6)) - snapshot_gfx_rc6_ms(); + snapshot_graphics(GFX_rc6); if (DO_BIC(BIC_GFXMHz)) - snapshot_gfx_mhz(); + snapshot_graphics(GFX_MHz); if (DO_BIC(BIC_GFXACTMHz)) - snapshot_gfx_act_mhz(); + snapshot_graphics(GFX_ACTMHz); + + if (DO_BIC(BIC_SAM_mc6)) + snapshot_graphics(SAM_mc6); + + if (DO_BIC(BIC_SAMMHz)) + snapshot_graphics(SAM_MHz); + + if (DO_BIC(BIC_SAMACTMHz)) + snapshot_graphics(SAM_ACTMHz); if (DO_BIC(BIC_CPU_LPI)) snapshot_cpu_lpi_us(); @@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) int ret, idx; unsigned long long msr_cur, msr_last; + assert(!no_msr); + if (!per_cpu_msr_sum) return 1; @@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg UNUSED(c); UNUSED(p); + assert(!no_msr); + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { unsigned long long msr_cur, msr_last; off_t offset; @@ -4280,7 +4999,8 @@ release_msr: /* * set_my_sched_priority(pri) - * return previous + * return previous priority on success + * return value < -20 on failure */ int set_my_sched_priority(int priority) { @@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority) errno = 0; original_priority = getpriority(PRIO_PROCESS, 0); if (errno && (original_priority == -1)) - err(errno, "getpriority"); + return -21; retval = setpriority(PRIO_PROCESS, 0, priority); if (retval) - errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname); + return -21; errno = 0; retval = getpriority(PRIO_PROCESS, 0); if (retval != priority) - err(retval, "getpriority(%d) != setpriority(%d)", retval, priority); + return -21; return original_priority; } @@ -4314,6 +5034,9 @@ void turbostat_loop() /* * elevate own priority for interval mode + * + * ignore on error - we probably don't have permission to set it, but + * it's not a big deal */ set_my_sched_priority(-20); @@ -4399,10 +5122,13 @@ void check_dev_msr() struct stat sb; char pathname[32]; + if (no_msr) + return; + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); if (stat(pathname, &sb)) if (system("/sbin/modprobe msr > /dev/null 2>&1")) - err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); + no_msr = 1; } /* @@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void) { cap_t caps; cap_flag_value_t cap_flag_value; + int ret = 0; caps = cap_get_proc(); if (caps == NULL) - err(-6, "cap_get_proc\n"); - - if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) - err(-6, "cap_get\n"); - - if (cap_flag_value != CAP_SET) { - warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname); return 1; + + if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { + ret = 1; + goto free_and_exit; } + if (cap_flag_value != CAP_SET) { + ret = 1; + goto free_and_exit; + } + +free_and_exit: if (cap_free(caps) == -1) err(-6, "cap_free\n"); - return 0; + return ret; } -void check_permissions(void) +void check_msr_permission(void) { - int do_exit = 0; + int failed = 0; char pathname[32]; + if (no_msr) + return; + /* check for CAP_SYS_RAWIO */ - do_exit += check_for_cap_sys_rawio(); + failed += check_for_cap_sys_rawio(); /* test file permissions */ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); if (euidaccess(pathname, R_OK)) { - do_exit++; - warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); + failed++; } - /* if all else fails, thell them to be root */ - if (do_exit) - if (getuid() != 0) - warnx("... or simply run as root"); - - if (do_exit) - exit(-6); + if (failed) { + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); + no_msr = 1; + } } void probe_bclk(void) @@ -4462,7 +5192,7 @@ void probe_bclk(void) unsigned long long msr; unsigned int base_ratio; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; if (platform->bclk_freq == BCLK_100MHZ) @@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void) if (!has_turbo) return; - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) return; if (platform->trl_msrs & TRL_LIMIT2) @@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path) static void probe_intel_uncore_frequency(void) { int i, j; - char path[128]; + char path[256]; if (!genuine_intel) return; - if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) - return; + if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) + goto probe_cluster; - /* Cluster level sysfs not supported yet. */ - if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK)) - return; - - if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) - BIC_PRESENT(BIC_UNCORE_MHZ); + BIC_PRESENT(BIC_UNCORE_MHZ); if (quiet) return; @@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void) for (i = 0; i < topo.num_packages; ++i) { for (j = 0; j < topo.num_die; ++j) { int k, l; + char path_base[128]; - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz", - i, j); - k = read_sysfs_int(path); - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz", - i, j); - l = read_sysfs_int(path); - fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, + j); - sprintf(path, - "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz", - i, j); + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); - sprintf(path, - "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz", - i, j); + sprintf(path, "%s/max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000); + fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); + + sprintf(path, "%s/initial_min_freq_khz", path_base); + k = read_sysfs_int(path); + sprintf(path, "%s/initial_max_freq_khz", path_base); + l = read_sysfs_int(path); + fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); + + sprintf(path, "%s/current_freq_khz", path_base); + k = read_sysfs_int(path); + fprintf(outf, " %d MHz\n", k / 1000); } } + return; + +probe_cluster: + if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) + return; + + if (quiet) + return; + + for (i = 0;; ++i) { + int k, l; + char path_base[128]; + int package_id, domain_id, cluster_id; + + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); + + if (access(path_base, R_OK)) + break; + + sprintf(path, "%s/package_id", path_base); + package_id = read_sysfs_int(path); + + sprintf(path, "%s/domain_id", path_base); + domain_id = read_sysfs_int(path); + + sprintf(path, "%s/fabric_cluster_id", path_base); + cluster_id = read_sysfs_int(path); + + sprintf(path, "%s/min_freq_khz", path_base); + k = read_sysfs_int(path); + sprintf(path, "%s/max_freq_khz", path_base); + l = read_sysfs_int(path); + fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, + cluster_id, k / 1000, l / 1000); + + sprintf(path, "%s/initial_min_freq_khz", path_base); + k = read_sysfs_int(path); + sprintf(path, "%s/initial_max_freq_khz", path_base); + l = read_sysfs_int(path); + fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); + + sprintf(path, "%s/current_freq_khz", path_base); + k = read_sysfs_int(path); + fprintf(outf, " %d MHz\n", k / 1000); + } } static void probe_graphics(void) { + /* Xe graphics sysfs knobs */ + if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { + FILE *fp; + char buf[8]; + bool gt0_is_gt; + int idx; + + fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); + if (!fp) + goto next; + + if (!fread(buf, sizeof(char), 7, fp)) { + fclose(fp); + goto next; + } + fclose(fp); + + if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) + gt0_is_gt = true; + else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) + gt0_is_gt = false; + else + goto next; + + idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; + + idx = gt0_is_gt ? GFX_MHz : SAM_MHz; + if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; + + idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; + if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; + + idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; + if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; + + idx = gt0_is_gt ? SAM_MHz : GFX_MHz; + if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; + + idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; + if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) + gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; + + goto end; + } + +next: + /* New i915 graphics sysfs knobs */ + if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { + gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; + + if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) + gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; + + if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) + gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; + + if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) + gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; + + if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) + gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; + + if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) + gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; + + goto end; + } + + /* Fall back to traditional i915 graphics sysfs knobs */ if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) + gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; + + if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) + gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; + else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; + + + if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) + gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; + else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; + +end: + if (gfx_info[GFX_rc6].path) BIC_PRESENT(BIC_GFX_rc6); - - if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) || - !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + if (gfx_info[GFX_MHz].path) BIC_PRESENT(BIC_GFXMHz); - - if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) || - !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + if (gfx_info[GFX_ACTMHz].path) BIC_PRESENT(BIC_GFXACTMHz); + if (gfx_info[SAM_mc6].path) + BIC_PRESENT(BIC_SAM_mc6); + if (gfx_info[SAM_MHz].path) + BIC_PRESENT(BIC_SAMMHz); + if (gfx_info[SAM_ACTMHz].path) + BIC_PRESENT(BIC_SAMACTMHz); } static void dump_sysfs_cstate_config(void) @@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) UNUSED(c); UNUSED(p); + if (no_msr) + return 0; + if (!has_hwp) return 0; @@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data UNUSED(c); UNUSED(p); + if (no_msr) + return 0; + cpu = t->cpu_id; /* per-package */ @@ -4983,31 +5852,18 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; + const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; + const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; - if (rapl_joules) { - if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) - BIC_PRESENT(BIC_Pkg_J); - if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) - BIC_PRESENT(BIC_Cor_J); - if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) - BIC_PRESENT(BIC_RAM_J); - if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) - BIC_PRESENT(BIC_GFX_J); - } else { - if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) - BIC_PRESENT(BIC_PkgWatt); - if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) - BIC_PRESENT(BIC_CorWatt); - if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) - BIC_PRESENT(BIC_RAMWatt); - if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) - BIC_PRESENT(BIC_GFXWatt); - } + if (rapl_joules) + bic_enabled &= ~bic_watt_bits; + else + bic_enabled &= ~bic_joules_bits; - if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) - BIC_PRESENT(BIC_PKG__); - if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) - BIC_PRESENT(BIC_RAM__); + if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) + bic_enabled &= ~BIC_PKG__; + if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) + bic_enabled &= ~BIC_RAM__; /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) @@ -5041,14 +5897,13 @@ void rapl_probe_amd(void) { unsigned long long msr; double tdp; + const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; + const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } + if (rapl_joules) + bic_enabled &= ~bic_watt_bits; + else + bic_enabled &= ~bic_joules_bits; if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) return; @@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) */ void probe_rapl(void) { - if (!platform->rapl_msrs) + if (!platform->rapl_msrs || no_msr) return; if (genuine_intel) @@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk } /* Temperature Target MSR is Nehalem and newer only */ - if (!platform->has_nhm_msrs) + if (!platform->has_nhm_msrs || no_msr) goto guess; if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) @@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p UNUSED(c); UNUSED(p); + if (no_msr) + return 0; + if (!(do_dts || do_ptm)) return 0; @@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void) { unsigned long long msr; + if (no_msr) + return; + if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); @@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void) { unsigned long long msr; + if (no_msr) + return; + if (!genuine_intel) return; @@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void) { unsigned long long msr; + if (no_msr) + return; + if (!platform->has_msr_misc_feature_control) return; @@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void) { unsigned long long msr; + if (no_msr) + return; + if (!platform->has_msr_misc_pwr_mgmt) return; @@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void) { unsigned long long msr; + if (no_msr) + return; + if (!platform->has_msr_c6_demotion_policy_config) return; @@ -5489,7 +6362,8 @@ void print_dev_latency(void) fd = open(path, O_RDONLY); if (fd < 0) { - warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname); + if (debug) + warnx("Read %s failed", path); return; } @@ -5504,23 +6378,260 @@ void print_dev_latency(void) close(fd); } +static int has_instr_count_access(void) +{ + int fd; + int has_access; + + if (no_perf) + return 0; + + fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); + has_access = fd != -1; + + if (fd != -1) + close(fd); + + if (!has_access) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", + "instructions retired perf counter", "--no-perf"); + + return has_access; +} + +bool is_aperf_access_required(void) +{ + return BIC_IS_ENABLED(BIC_Avg_MHz) + || BIC_IS_ENABLED(BIC_Busy) + || BIC_IS_ENABLED(BIC_Bzy_MHz) + || BIC_IS_ENABLED(BIC_IPC); +} + +int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, + double *scale_, enum rapl_unit *unit_) +{ + if (no_perf) + return -1; + + const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); + + if (scale == 0.0) + return -1; + + const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); + + if (unit == RAPL_UNIT_INVALID) + return -1; + + const unsigned int rapl_type = read_perf_type(cai->perf_subsys); + const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); + + const int fd_counter = + open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); + if (fd_counter == -1) + return -1; + + /* If it's the first counter opened, make it a group descriptor */ + if (rci->fd_perf == -1) + rci->fd_perf = fd_counter; + + *scale_ = scale; + *unit_ = unit; + return fd_counter; +} + +int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, + double *scale, enum rapl_unit *unit) +{ + int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); + + if (debug) + fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); + + return ret; +} + /* * Linux-perf manages the HW instructions-retired counter * by enabling when requested, and hiding rollover */ void linux_perf_init(void) { - if (!BIC_IS_ENABLED(BIC_IPC)) - return; - if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) return; - fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); - if (fd_instr_count_percpu == NULL) - err(-1, "calloc fd_instr_count_percpu"); + if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { + fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_instr_count_percpu == NULL) + err(-1, "calloc fd_instr_count_percpu"); + } - BIC_PRESENT(BIC_IPC); + const bool aperf_required = is_aperf_access_required(); + + if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { + fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); + if (fd_amperf_percpu == NULL) + err(-1, "calloc fd_amperf_percpu"); + } +} + +void rapl_perf_init(void) +{ + const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages; + bool *domain_visited = calloc(num_domains, sizeof(bool)); + + rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); + if (rapl_counter_info_perdomain == NULL) + err(-1, "calloc rapl_counter_info_percpu"); + + /* + * Initialize rapl_counter_info_percpu + */ + for (int domain_id = 0; domain_id < num_domains; ++domain_id) { + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; + + rci->fd_perf = -1; + for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { + rci->data[i] = 0; + rci->source[i] = RAPL_SOURCE_NONE; + } + } + + /* + * Open/probe the counters + * If can't get it via perf, fallback to MSR + */ + for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { + + const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; + bool has_counter = 0; + double scale; + enum rapl_unit unit; + int next_domain; + + memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); + + for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { + + if (cpu_is_not_allowed(cpu)) + continue; + + /* Skip already seen and handled RAPL domains */ + next_domain = + platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + + if (domain_visited[next_domain]) + continue; + + domain_visited[next_domain] = 1; + + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; + + /* Check if the counter is enabled and accessible */ + if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { + + /* Use perf API for this counter */ + if (!no_perf && cai->perf_name + && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { + rci->source[cai->rci_index] = RAPL_SOURCE_PERF; + rci->scale[cai->rci_index] = scale * cai->compat_scale; + rci->unit[cai->rci_index] = unit; + rci->flags[cai->rci_index] = cai->flags; + + /* Use MSR for this counter */ + } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + rci->source[cai->rci_index] = RAPL_SOURCE_MSR; + rci->msr[cai->rci_index] = cai->msr; + rci->msr_mask[cai->rci_index] = cai->msr_mask; + rci->msr_shift[cai->rci_index] = cai->msr_shift; + rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; + rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; + rci->flags[cai->rci_index] = cai->flags; + } + } + + if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) + has_counter = 1; + } + + /* If any CPU has access to the counter, make it present */ + if (has_counter) + BIC_PRESENT(cai->bic); + } + + free(domain_visited); +} + +static int has_amperf_access_via_msr(void) +{ + if (no_msr) + return 0; + + if (probe_msr(base_cpu, MSR_IA32_APERF)) + return 0; + + if (probe_msr(base_cpu, MSR_IA32_MPERF)) + return 0; + + return 1; +} + +static int has_amperf_access_via_perf(void) +{ + struct amperf_group_fd fds; + + /* + * Cache the last result, so we don't warn the user multiple times + * + * Negative means cached, no access + * Zero means not cached + * Positive means cached, has access + */ + static int has_access_cached; + + if (no_perf) + return 0; + + if (has_access_cached != 0) + return has_access_cached > 0; + + fds = open_amperf_fd(base_cpu); + has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); + + if (fds.aperf == -1) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", + "APERF perf counter", "--no-perf"); + else + close(fds.aperf); + + if (fds.mperf == -1) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", + "MPERF perf counter", "--no-perf"); + else + close(fds.mperf); + + if (has_access_cached == 0) + has_access_cached = -1; + + return has_access_cached > 0; +} + +/* Check if we can access APERF and MPERF */ +static int has_amperf_access(void) +{ + if (!is_aperf_access_required()) + return 0; + + if (!no_msr && has_amperf_access_via_msr()) + return 1; + + if (!no_perf && has_amperf_access_via_perf()) + return 1; + + return 0; } void probe_cstates(void) @@ -5563,7 +6674,7 @@ void probe_cstates(void) if (platform->has_msr_module_c6_res_ms) BIC_PRESENT(BIC_Mod_c6); - if (platform->has_ext_cst_msrs) { + if (platform->has_ext_cst_msrs && !no_msr) { BIC_PRESENT(BIC_Totl_c0); BIC_PRESENT(BIC_Any_c0); BIC_PRESENT(BIC_GFX_c0); @@ -5623,6 +6734,7 @@ void process_cpuid() unsigned int eax, ebx, ecx, edx; unsigned int fms, family, model, stepping, ecx_flags, edx_flags; unsigned long long ucode_patch = 0; + bool ucode_patch_valid = false; eax = ebx = ecx = edx = 0; @@ -5650,8 +6762,12 @@ void process_cpuid() ecx_flags = ecx; edx_flags = edx; - if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) - warnx("get_msr(UCODE)"); + if (!no_msr) { + if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) + warnx("get_msr(UCODE)"); + else + ucode_patch_valid = true; + } /* * check max extended function levels of CPUID. @@ -5662,9 +6778,12 @@ void process_cpuid() __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (!quiet) { - fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n", - family, model, stepping, family, model, stepping, - (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", + family, model, stepping, family, model, stepping); + if (ucode_patch_valid) + fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + fputc('\n', outf); + fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", ecx_flags & (1 << 0) ? "SSE3" : "-", @@ -5700,10 +6819,11 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); - if (has_aperf) { + if (has_aperf && has_amperf_access()) { BIC_PRESENT(BIC_Avg_MHz); BIC_PRESENT(BIC_Busy); BIC_PRESENT(BIC_Bzy_MHz); + BIC_PRESENT(BIC_IPC); } do_dts = eax & (1 << 0); if (do_dts) @@ -5786,6 +6906,15 @@ void process_cpuid() base_mhz = max_mhz = bus_mhz = edx = 0; __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); + + bclk = bus_mhz; + + base_hz = base_mhz * 1000000; + has_base_hz = 1; + + if (platform->enable_tsc_tweak) + tsc_tweak = base_hz / tsc_hz; + if (!quiet) fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); @@ -5814,7 +6943,7 @@ void probe_pm_features(void) probe_thermal(); - if (platform->has_nhm_msrs) + if (platform->has_nhm_msrs && !no_msr) BIC_PRESENT(BIC_SMI); if (!quiet) @@ -6142,6 +7271,7 @@ void topology_update(void) topo.allowed_packages = 0; for_all_cpus(update_topo, ODD_COUNTERS); } + void setup_all_buffers(bool startup) { topology_probe(startup); @@ -6169,21 +7299,129 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } +static void set_amperf_source(void) +{ + amperf_source = AMPERF_SOURCE_PERF; + + const bool aperf_required = is_aperf_access_required(); + + if (no_perf || !aperf_required || !has_amperf_access_via_perf()) + amperf_source = AMPERF_SOURCE_MSR; + + if (quiet || !debug) + return; + + fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); +} + +bool has_added_counters(void) +{ + /* + * It only makes sense to call this after the command line is parsed, + * otherwise sys structure is not populated. + */ + + return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; +} + +bool is_msr_access_required(void) +{ + if (no_msr) + return false; + + if (has_added_counters()) + return true; + + return BIC_IS_ENABLED(BIC_SMI) + || BIC_IS_ENABLED(BIC_CPU_c1) + || BIC_IS_ENABLED(BIC_CPU_c3) + || BIC_IS_ENABLED(BIC_CPU_c6) + || BIC_IS_ENABLED(BIC_CPU_c7) + || BIC_IS_ENABLED(BIC_Mod_c6) + || BIC_IS_ENABLED(BIC_CoreTmp) + || BIC_IS_ENABLED(BIC_Totl_c0) + || BIC_IS_ENABLED(BIC_Any_c0) + || BIC_IS_ENABLED(BIC_GFX_c0) + || BIC_IS_ENABLED(BIC_CPUGFX) + || BIC_IS_ENABLED(BIC_Pkgpc3) + || BIC_IS_ENABLED(BIC_Pkgpc6) + || BIC_IS_ENABLED(BIC_Pkgpc2) + || BIC_IS_ENABLED(BIC_Pkgpc7) + || BIC_IS_ENABLED(BIC_Pkgpc8) + || BIC_IS_ENABLED(BIC_Pkgpc9) + || BIC_IS_ENABLED(BIC_Pkgpc10) + /* TODO: Multiplex access with perf */ + || BIC_IS_ENABLED(BIC_CorWatt) + || BIC_IS_ENABLED(BIC_Cor_J) + || BIC_IS_ENABLED(BIC_PkgWatt) + || BIC_IS_ENABLED(BIC_CorWatt) + || BIC_IS_ENABLED(BIC_GFXWatt) + || BIC_IS_ENABLED(BIC_RAMWatt) + || BIC_IS_ENABLED(BIC_Pkg_J) + || BIC_IS_ENABLED(BIC_Cor_J) + || BIC_IS_ENABLED(BIC_GFX_J) + || BIC_IS_ENABLED(BIC_RAM_J) + || BIC_IS_ENABLED(BIC_PKG__) + || BIC_IS_ENABLED(BIC_RAM__) + || BIC_IS_ENABLED(BIC_PkgTmp) + || (is_aperf_access_required() && !has_amperf_access_via_perf()); +} + +void check_msr_access(void) +{ + if (!is_msr_access_required()) + no_msr = 1; + + check_dev_msr(); + check_msr_permission(); + + if (no_msr) + bic_disable_msr_access(); +} + +void check_perf_access(void) +{ + const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); + + if (no_perf || !intrcount_required || !has_instr_count_access()) + bic_enabled &= ~BIC_IPC; + + const bool aperf_required = is_aperf_access_required(); + + if (!aperf_required || !has_amperf_access()) { + bic_enabled &= ~BIC_Avg_MHz; + bic_enabled &= ~BIC_Busy; + bic_enabled &= ~BIC_Bzy_MHz; + bic_enabled &= ~BIC_IPC; + } +} + void turbostat_init() { setup_all_buffers(true); set_base_cpu(); - check_dev_msr(); - check_permissions(); + check_msr_access(); + check_perf_access(); process_cpuid(); probe_pm_features(); + set_amperf_source(); linux_perf_init(); + rapl_perf_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); if (DO_BIC(BIC_IPC)) (void)get_instr_count_fd(base_cpu); + + /* + * If TSC tweak is needed, but couldn't get it, + * disable more BICs, since it can't be reported accurately. + */ + if (platform->enable_tsc_tweak && !has_base_hz) { + bic_enabled &= ~BIC_Busy; + bic_enabled &= ~BIC_Bzy_MHz; + } } int fork_it(char **argv) @@ -6259,7 +7497,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2023.11.07 - Len Brown \n"); + fprintf(outf, "turbostat version 2024.04.08 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 @@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name, { struct msr_counter *msrp; + if (no_msr && msr_num) + errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); + msrp = calloc(1, sizeof(struct msr_counter)); if (msrp == NULL) { perror("calloc"); @@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv) { "list", no_argument, 0, 'l' }, { "out", required_argument, 0, 'o' }, { "quiet", no_argument, 0, 'q' }, + { "no-msr", no_argument, 0, 'M' }, + { "no-perf", no_argument, 0, 'P' }, { "show", required_argument, 0, 's' }, { "Summary", no_argument, 0, 'S' }, { "TCC", required_argument, 0, 'T' }, @@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { + /* + * Parse some options early, because they may make other options invalid, + * like adding the MSR counter with --add and at the same time using --no-msr. + */ + while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { + switch (opt) { + case 'M': + no_msr = 1; + break; + case 'P': + no_perf = 1; + break; + default: + break; + } + } + optind = 0; + + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': parse_add_command(optarg); @@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv) case 'q': quiet = 1; break; + case 'M': + case 'P': + /* Parsed earlier */ + break; case 'n': num_iterations = strtod(optarg, NULL); @@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv) } } +void set_rlimit(void) +{ + struct rlimit limit; + + if (getrlimit(RLIMIT_NOFILE, &limit) < 0) + err(1, "Failed to get rlimit"); + + if (limit.rlim_max < MAX_NOFILE) + limit.rlim_max = MAX_NOFILE; + if (limit.rlim_cur < MAX_NOFILE) + limit.rlim_cur = MAX_NOFILE; + + if (setrlimit(RLIMIT_NOFILE, &limit) < 0) + err(1, "Failed to set rlimit"); +} + int main(int argc, char **argv) { int fd, ret; @@ -6729,9 +8010,13 @@ skip_cgroup_setting: probe_sysfs(); + if (!getuid()) + set_rlimit(); + turbostat_init(); - msr_sum_record(); + if (!no_msr) + msr_sum_record(); /* dump counters and exit */ if (dump_only) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 908e0d083936..61c69297e797 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range, { dpa_perf->qos_class = FAKE_QTG_ID; dpa_perf->dpa_range = *range; - dpa_perf->coord.read_latency = 500; - dpa_perf->coord.write_latency = 500; - dpa_perf->coord.read_bandwidth = 1000; - dpa_perf->coord.write_bandwidth = 1000; + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + dpa_perf->coord[i].read_latency = 500; + dpa_perf->coord[i].write_latency = 500; + dpa_perf->coord[i].read_bandwidth = 1000; + dpa_perf->coord[i].write_bandwidth = 1000; + } } static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index aa5ec149f96c..b3b00269a52a 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -28,6 +28,8 @@ CONFIG_MCTP_FLOWS=y CONFIG_INET=y CONFIG_MPTCP=y +CONFIG_NETDEVICES=y +CONFIG_WLAN=y CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_WLAN_VENDOR_INTEL=y @@ -38,6 +40,7 @@ CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DEBUG_FS=y CONFIG_DAMON_DBGFS=y +CONFIG_DAMON_DBGFS_DEPRECATED=y CONFIG_REGMAP_BUILD=y diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index bcf195c64a45..567491f3e1b5 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -32,7 +32,7 @@ */ #endif -#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) #define __arena __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c index 0766702de846..d69fd2465f53 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_htab.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c @@ -3,12 +3,14 @@ #include #include #include - +#include +#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */ +#include +#define PAGE_SIZE getpagesize() +#endif #include "arena_htab_asm.skel.h" #include "arena_htab.skel.h" -#define PAGE_SIZE 4096 - #include "bpf_arena_htab.h" static void test_arena_htab_common(struct htab *htab) diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c index e61886debab1..d15867cddde0 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_list.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c @@ -3,8 +3,11 @@ #include #include #include - -#define PAGE_SIZE 4096 +#include +#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */ +#include +#define PAGE_SIZE getpagesize() +#endif #include "bpf_arena_list.h" #include "arena_list.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 053f4d6da77a..cc184e4420f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include +#include #include #include "bloom_filter_map.skel.h" @@ -21,6 +22,11 @@ static void test_fail_cases(void) if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0")) close(fd); + /* Invalid value size: too big */ + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large")) + close(fd); + /* Invalid max entries size */ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL); if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size")) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 985273832f89..c4f9f306646e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -5,6 +5,7 @@ #include "cap_helpers.h" #include "verifier_and.skel.h" #include "verifier_arena.skel.h" +#include "verifier_arena_large.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bitfield_write.skel.h" @@ -120,6 +121,7 @@ static void run_tests_aux(const char *skel_name, void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_arena(void) { RUN(verifier_arena); } +void test_verifier_arena_large(void) { RUN(verifier_arena_large); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c index b7bb712cacfd..1e6ac187a6a0 100644 --- a/tools/testing/selftests/bpf/progs/arena_htab.c +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -22,7 +22,7 @@ int zero = 0; SEC("syscall") int arena_htab_llvm(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM) struct htab __arena *htab; __u64 i; diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index cd35b8448435..c0422c58cee2 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -30,13 +30,13 @@ int list_sum; int cnt; bool skip = false; -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST long __arena arena_sum; int __arena test_val = 1; struct arena_list_head __arena global_head; #else -long arena_sum SEC(".arena.1"); -int test_val SEC(".arena.1"); +long arena_sum SEC(".addr_space.1"); +int test_val SEC(".addr_space.1"); #endif int zero; @@ -44,7 +44,7 @@ int zero; SEC("syscall") int arena_list_add(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST __u64 i; list_head = &global_head; @@ -66,7 +66,7 @@ int arena_list_add(void *ctx) SEC("syscall") int arena_list_del(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST struct elem __arena *n; int sum = 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 5540b05ff9ee..93144ae6df74 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -12,14 +12,18 @@ struct { __uint(type, BPF_MAP_TYPE_ARENA); __uint(map_flags, BPF_F_MMAPABLE); __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/ - __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#ifdef __TARGET_ARCH_arm64 + __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#else + __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#endif } arena SEC(".maps"); SEC("syscall") __success __retval(0) int basic_alloc1(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile int __arena *page1, *page2, *no_page, *page3; page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); @@ -58,7 +62,7 @@ SEC("syscall") __success __retval(0) int basic_alloc2(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile char __arena *page1, *page2, *page3, *page4; page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c new file mode 100644 index 000000000000..ef66ea460264 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" +#include "bpf_arena_common.h" + +#define ARENA_SIZE (1ull << 32) + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, ARENA_SIZE / PAGE_SIZE); +} arena SEC(".maps"); + +SEC("syscall") +__success __retval(0) +int big_alloc1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page1, *page2, *no_page, *page3; + void __arena *base; + + page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page1) + return 1; + *page1 = 1; + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + 1, NUMA_NO_NODE, 0); + if (!page2) + return 2; + *page2 = 2; + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + 1, NUMA_NO_NODE, 0); + if (no_page) + return 3; + if (*page1 != 1) + return 4; + if (*page2 != 2) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)page1, 1); + if (*page2 != 2) + return 6; + if (*page1 != 0) /* use-after-free should return 0 */ + return 7; + page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page3) + return 8; + *page3 = 3; + if (page1 != page3) + return 9; + if (*page2 != 2) + return 10; + if (*(page1 + PAGE_SIZE) != 0) + return 11; + if (*(page1 - PAGE_SIZE) != 0) + return 12; + if (*(page2 + PAGE_SIZE) != 0) + return 13; + if (*(page2 - PAGE_SIZE) != 0) + return 14; +#endif + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config new file mode 100644 index 000000000000..be091f1cdfa0 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/config @@ -0,0 +1,3 @@ +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y +CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings new file mode 100644 index 000000000000..a62d2fa1275c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/settings @@ -0,0 +1 @@ +timeout=600 diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index a0b8688b0836..fb4472ddffd8 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -19,8 +19,8 @@ include ../lib.mk $(OUTPUT)/subdir: mkdir -p $@ -$(OUTPUT)/script: - echo '#!/bin/sh' > $@ +$(OUTPUT)/script: Makefile + echo '#!/bin/bash' > $@ echo 'exit $$*' >> $@ chmod +x $@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat diff --git a/tools/testing/selftests/exec/binfmt_script.py b/tools/testing/selftests/exec/binfmt_script.py index 05f94a741c7a..2c575a2c0eab 100755 --- a/tools/testing/selftests/exec/binfmt_script.py +++ b/tools/testing/selftests/exec/binfmt_script.py @@ -16,6 +16,8 @@ SIZE=256 NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."])) test_num=0 +pass_num=0 +fail_num=0 code='''#!/usr/bin/perl print "Executed interpreter! Args:\n"; @@ -42,7 +44,7 @@ foreach my $a (@ARGV) { # ... def test(name, size, good=True, leading="", root="./", target="/perl", fill="A", arg="", newline="\n", hashbang="#!"): - global test_num, tests, NAME_MAX + global test_num, pass_num, fail_num, tests, NAME_MAX test_num += 1 if test_num > tests: raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)" @@ -80,16 +82,20 @@ def test(name, size, good=True, leading="", root="./", target="/perl", if good: print("ok %d - binfmt_script %s (successful good exec)" % (test_num, name)) + pass_num += 1 else: print("not ok %d - binfmt_script %s succeeded when it should have failed" % (test_num, name)) + fail_num = 1 else: if good: print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)" % (test_num, name, proc.returncode)) + fail_num = 1 else: print("ok %d - binfmt_script %s (correctly failed bad exec)" % (test_num, name)) + pass_num += 1 # Clean up crazy binaries os.unlink(script) @@ -166,6 +172,8 @@ test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ") test(name="two-under-leading", size=int(SIZE/2), leading=" ") test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ") +print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num)) + if test_num != tests: raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d" % (test_num, tests)) diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 0546ca24f2b2..6418ded40bdd 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -98,10 +98,9 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, if (child == 0) { /* Child: do execveat(). */ rc = execveat_(fd, path, argv, envp, flags); - ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n", + ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n", rc, errno, strerror(errno)); - ksft_test_result_fail("%s\n", test_name); - exit(1); /* should not reach here */ + exit(errno); } /* Parent: wait for & check child's exit status. */ rc = waitpid(child, &status, 0); @@ -226,11 +225,14 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) * "If the command name is found, but it is not an executable utility, * the exit status shall be 126."), so allow either. */ - if (is_script) + if (is_script) { + ksft_print_msg("Invoke script via root_dfd and relative filename\n"); fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0, 127, 126); - else + } else { + ksft_print_msg("Invoke exec via root_dfd and relative filename\n"); fail += check_execveat(root_dfd, longpath + 1, 0); + } return fail; } diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c index d487c2f6a615..17e3207d34ae 100644 --- a/tools/testing/selftests/exec/load_address.c +++ b/tools/testing/selftests/exec/load_address.c @@ -5,6 +5,7 @@ #include #include #include +#include "../kselftest.h" struct Statistics { unsigned long long load_address; @@ -41,28 +42,23 @@ int main(int argc, char **argv) unsigned long long misalign; int ret; - ret = dl_iterate_phdr(ExtractStatistics, &extracted); - if (ret != 1) { - fprintf(stderr, "FAILED\n"); - return 1; - } + ksft_print_header(); + ksft_set_plan(1); - if (extracted.alignment == 0) { - fprintf(stderr, "No alignment found\n"); - return 1; - } else if (extracted.alignment & (extracted.alignment - 1)) { - fprintf(stderr, "Alignment is not a power of 2\n"); - return 1; - } + ret = dl_iterate_phdr(ExtractStatistics, &extracted); + if (ret != 1) + ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n"); + + if (extracted.alignment == 0) + ksft_exit_fail_msg("FAILED: No alignment found\n"); + else if (extracted.alignment & (extracted.alignment - 1)) + ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n"); misalign = extracted.load_address & (extracted.alignment - 1); - if (misalign) { - printf("alignment = %llu, load_address = %llu\n", - extracted.alignment, extracted.load_address); - fprintf(stderr, "FAILED\n"); - return 1; - } + if (misalign) + ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n", + extracted.alignment, extracted.load_address); - fprintf(stderr, "PASS\n"); - return 0; + ksft_test_result_pass("Completed\n"); + ksft_finished(); } diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c index 2dbd5bc45b3e..b2f37d86a5f6 100644 --- a/tools/testing/selftests/exec/recursion-depth.c +++ b/tools/testing/selftests/exec/recursion-depth.c @@ -23,45 +23,44 @@ #include #include #include +#include "../kselftest.h" int main(void) { + int fd, rv; + + ksft_print_header(); + ksft_set_plan(1); + if (unshare(CLONE_NEWNS) == -1) { if (errno == ENOSYS || errno == EPERM) { - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 4; + ksft_test_result_skip("error: unshare, errno %d\n", errno); + ksft_finished(); } - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 1; - } - if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { - fprintf(stderr, "error: mount '/', errno %d\n", errno); - return 1; + ksft_exit_fail_msg("error: unshare, errno %d\n", errno); } + + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1) + ksft_exit_fail_msg("error: mount '/', errno %d\n", errno); + /* Require "exec" filesystem. */ - if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) { - fprintf(stderr, "error: mount ramfs, errno %d\n", errno); - return 1; - } + if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) + ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno); #define FILENAME "/tmp/1" - int fd = creat(FILENAME, 0700); - if (fd == -1) { - fprintf(stderr, "error: creat, errno %d\n", errno); - return 1; - } + fd = creat(FILENAME, 0700); + if (fd == -1) + ksft_exit_fail_msg("error: creat, errno %d\n", errno); + #define S "#!" FILENAME "\n" - if (write(fd, S, strlen(S)) != strlen(S)) { - fprintf(stderr, "error: write, errno %d\n", errno); - return 1; - } + if (write(fd, S, strlen(S)) != strlen(S)) + ksft_exit_fail_msg("error: write, errno %d\n", errno); + close(fd); - int rv = execve(FILENAME, NULL, NULL); - if (rv == -1 && errno == ELOOP) { - return 0; - } - fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno); - return 1; + rv = execve(FILENAME, NULL, NULL); + ksft_test_result(rv == -1 && errno == ELOOP, + "execve failed as expected (ret %d, errno %d)\n", rv, errno); + ksft_finished(); } diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b1ede6249866..b7c8f29c09a9 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -18,7 +18,7 @@ echo 'sched:*' > set_event yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +29,7 @@ echo 1 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -40,7 +40,7 @@ echo 0 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 2de7c61d1ae3..3f74c09c56b6 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -24,7 +24,7 @@ echo 0 > events/enable echo "Get the most frequently calling function" sample_events -target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` if [ -z "$target_func" ]; then exit_fail fi diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config index 110d73917615..02a2a1b267c1 100644 --- a/tools/testing/selftests/iommu/config +++ b/tools/testing/selftests/iommu/config @@ -1,3 +1,5 @@ CONFIG_IOMMUFD=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION=y CONFIG_IOMMUFD_TEST=y +CONFIG_FAILSLAB=y diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 541bf192e30e..14bbab0cce13 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -51,6 +51,7 @@ #include #include #include +#include #endif #ifndef ARRAY_SIZE @@ -79,6 +80,9 @@ #define KSFT_XPASS 3 #define KSFT_SKIP 4 +#ifndef __noreturn +#define __noreturn __attribute__((__noreturn__)) +#endif #define __printf(a, b) __attribute__((format(printf, a, b))) /* counters */ @@ -288,24 +292,26 @@ void ksft_test_result_code(int exit_code, const char *test_name, } /* Docs seem to call for double space if directive is absent */ - if (!directive[0] && msg[0]) + if (!directive[0] && msg) directive = " # "; - va_start(args, msg); printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive); errno = saved_errno; - vprintf(msg, args); + if (msg) { + va_start(args, msg); + vprintf(msg, args); + va_end(args); + } printf("\n"); - va_end(args); } -static inline int ksft_exit_pass(void) +static inline __noreturn int ksft_exit_pass(void) { ksft_print_cnts(); exit(KSFT_PASS); } -static inline int ksft_exit_fail(void) +static inline __noreturn int ksft_exit_fail(void) { ksft_print_cnts(); exit(KSFT_FAIL); @@ -332,7 +338,7 @@ static inline int ksft_exit_fail(void) ksft_cnt.ksft_xfail + \ ksft_cnt.ksft_xskip) -static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) +static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -347,19 +353,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) exit(KSFT_FAIL); } -static inline int ksft_exit_xfail(void) +static inline __noreturn int ksft_exit_xfail(void) { ksft_print_cnts(); exit(KSFT_XFAIL); } -static inline int ksft_exit_xpass(void) +static inline __noreturn int ksft_exit_xpass(void) { ksft_print_cnts(); exit(KSFT_XPASS); } -static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...) +static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -388,4 +394,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...) exit(KSFT_SKIP); } +static inline int ksft_min_kernel_version(unsigned int min_major, + unsigned int min_minor) +{ +#ifdef NOLIBC + ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n"); + return 0; +#else + unsigned int major, minor; + struct utsname info; + + if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2) + ksft_exit_fail_msg("Can't parse kernel version\n"); + + return major > min_major || (major == min_major && minor >= min_minor); +#endif +} + #endif /* __KSELFTEST_H */ diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4fd735e48ee7..ba3ddeda24bf 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -383,6 +383,7 @@ FIXTURE_DATA(fixture_name) self; \ pid_t child = 1; \ int status = 0; \ + bool jmp = false; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ if (setjmp(_metadata->env) == 0) { \ /* Use the same _metadata. */ \ @@ -399,8 +400,10 @@ _metadata->exit_code = KSFT_FAIL; \ } \ } \ + else \ + jmp = true; \ if (child == 0) { \ - if (_metadata->setup_completed && !_metadata->teardown_parent) \ + if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \ fixture_name##_teardown(_metadata, &self, variant->data); \ _exit(0); \ } \ @@ -1202,7 +1205,7 @@ void __run_test(struct __fixture_metadata *f, diagnostic = "unknown"; ksft_test_result_code(t->exit_code, test_name, - diagnostic ? "%s" : "", diagnostic); + diagnostic ? "%s" : NULL, diagnostic); } static int test_harness_run(int argc, char **argv) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index ddba2c2fb5de..4eaba83cdcf3 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, irq_iter = READ_ONCE(shared_data->nr_iter); __GUEST_ASSERT(config_iter + 1 == irq_iter, - "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n" - " Guest timer interrupt was not trigged within the specified\n" + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" " interval, try to increase the error margin by [-e] option.\n", config_iter + 1, irq_iter); } diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 3bd03b088dda..81ce37ec407d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); + +static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + return *((&entry->eax) + feature.reg) & BIT(feature.bit); +} + void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_feature feature, bool set); diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 6628dc4dda89..1a6da7389bf1 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -22,10 +22,11 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; - for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; - - GUEST_DONE(); + for (;;) { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa) = gpa; + GUEST_SYNC(0); + } } struct vcpu_info { @@ -55,7 +56,7 @@ static void rendezvous_with_boss(void) static void run_vcpu(struct kvm_vcpu *vcpu) { vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); } static void *vcpu_worker(void *data) @@ -64,17 +65,13 @@ static void *vcpu_worker(void *data) struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; struct kvm_sregs sregs; - struct kvm_regs regs; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); - /* Snapshot regs before the first run. */ - vcpu_regs_get(vcpu, ®s); rendezvous_with_boss(); run_vcpu(vcpu); rendezvous_with_boss(); - vcpu_regs_set(vcpu, ®s); vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ /* Toggle CR0.WP to trigger a MMU context reset. */ diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index e22848f747c0..0f9cabd99fd4 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data) irq_iter = READ_ONCE(shared_data->nr_iter); __GUEST_ASSERT(config_iter + 1 == irq_iter, "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" - " Guest timer interrupt was not trigged within the specified\n" + " Guest timer interrupt was not triggered within the specified\n" " interval, try to increase the error margin by [-e] option.\n", config_iter + 1, irq_iter); } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 06b43ed23580..bd57d991e27d 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -333,7 +333,7 @@ static void test_invalid_memory_region_flags(void) struct kvm_vm *vm; int r, i; -#if defined __aarch64__ || defined __x86_64__ +#if defined __aarch64__ || defined __riscv || defined __x86_64__ supported_flags |= KVM_MEM_READONLY; #endif diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 9e2879af7c20..40cc59f4e650 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } +static void test_pv_unhalt(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_cpuid_entry2 *ent; + u32 kvm_sig_old; + + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + + TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + + /* KVM_PV_UNHALT test */ + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); + + /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + kvm_sig_old = ent->ebx; + ent->ebx = 0xdeadbeef; + vcpu_set_cpuid(vcpu); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + ent->ebx = kvm_sig_old; + vcpu_set_cpuid(vcpu); + + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + + /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ + + kvm_vm_free(vm); +} + int main(void) { struct kvm_vcpu *vcpu; @@ -151,4 +188,6 @@ int main(void) enter_guest(vcpu); kvm_vm_free(vm); + + test_pv_unhalt(); } diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 29609b52f8fa..26c85815f7e9 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -416,12 +416,30 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters static void guest_test_gp_counters(void) { + uint8_t pmu_version = guest_get_pmu_version(); uint8_t nr_gp_counters = 0; uint32_t base_msr; - if (guest_get_pmu_version()) + if (pmu_version) nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + /* + * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is + * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number + * of GP counters. If there are no GP counters, require KVM to leave + * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but + * follow the spirit of the architecture and only globally enable GP + * counters, of which there are none. + */ + if (pmu_version > 1) { + uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); + + if (nr_gp_counters) + GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); + else + GUEST_ASSERT_EQ(global_ctrl, 0); + } + if (this_cpu_has(X86_FEATURE_PDCM) && rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) base_msr = MSR_IA32_PMC0; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 7f6f5f23fb9b..977948fd52e6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -28,16 +28,16 @@ #define NESTED_TEST_MEM1 0xc0001000 #define NESTED_TEST_MEM2 0xc0002000 -static void l2_guest_code(void) +static void l2_guest_code(u64 *a, u64 *b) { - *(volatile uint64_t *)NESTED_TEST_MEM1; - *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + READ_ONCE(*a); + WRITE_ONCE(*a, 1); GUEST_SYNC(true); GUEST_SYNC(false); - *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + WRITE_ONCE(*b, 1); GUEST_SYNC(true); - *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + WRITE_ONCE(*b, 1); GUEST_SYNC(true); GUEST_SYNC(false); @@ -45,17 +45,33 @@ static void l2_guest_code(void) vmcall(); } +static void l2_guest_code_ept_enabled(void) +{ + l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); +} + +static void l2_guest_code_ept_disabled(void) +{ + /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ + l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); +} + void l1_guest_code(struct vmx_pages *vmx) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); GUEST_ASSERT(load_vmcs(vmx)); - prepare_vmcs(vmx, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); + if (vmx->eptp_gpa) + l2_rip = l2_guest_code_ept_enabled; + else + l2_rip = l2_guest_code_ept_disabled; + + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(false); GUEST_ASSERT(!vmlaunch()); @@ -64,7 +80,7 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } -int main(int argc, char *argv[]) +static void test_vmx_dirty_log(bool enable_ept) { vm_vaddr_t vmx_pages_gva = 0; struct vmx_pages *vmx; @@ -76,8 +92,7 @@ int main(int argc, char *argv[]) struct ucall uc; bool done = false; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); + pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); @@ -103,11 +118,16 @@ int main(int argc, char *argv[]) * * Note that prepare_eptp should be called only L1's GPA map is done, * meaning after the last call to virt_map. + * + * When EPT is disabled, the L2 guest code will still access the same L1 + * GPAs as the EPT enabled case. */ - prepare_eptp(vmx, vm, 0); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + if (enable_ept) { + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + } bmap = bitmap_zalloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); @@ -148,3 +168,15 @@ int main(int argc, char *argv[]) } } } + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + test_vmx_dirty_log(/*enable_ept=*/false); + + if (kvm_cpu_has_ept()) + test_vmx_dirty_log(/*enable_ept=*/true); + + return 0; +} diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index cbe99594d319..18a49c70d4c6 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -203,7 +203,7 @@ int main(int argc, char **argv) ksft_print_header(); ksft_set_plan(nthreads); - filed = open(file, O_RDWR|O_CREAT); + filed = open(file, O_RDWR|O_CREAT, 0664); if (filed < 0) ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno)); diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index f822ae31af22..374a308174d2 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -1745,9 +1745,12 @@ void pkey_setup_shadow(void) shadow_pkey_reg = __read_pkey_reg(); } +pid_t parent_pid; + void restore_settings_atexit(void) { - cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); + if (parent_pid == getpid()) + cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); } void save_settings(void) @@ -1773,6 +1776,7 @@ void save_settings(void) exit(__LINE__); } + parent_pid = getpid(); atexit(restore_settings_atexit); close(fd); } diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index cc5f144430d4..7dbfa53d93a0 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -137,7 +137,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon) if (!map) ksft_exit_fail_msg("anon mmap failed\n"); } else { - test_fd = open(fname, O_RDWR | O_CREAT); + test_fd = open(fname, O_RDWR | O_CREAT, 0664); if (test_fd < 0) { ksft_test_result_skip("Test %s open() file failed\n", __func__); return; diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 856662d2f87a..6c988bd2f335 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -223,7 +223,7 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); } - fd = open(testfile, O_CREAT|O_WRONLY); + fd = open(testfile, O_CREAT|O_WRONLY, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index b0ac0ec2356d..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -18,6 +18,7 @@ bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; +atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -518,6 +519,8 @@ void *uffd_poll_thread(void *arg) pollfd[1].fd = pipefd[cpu*2]; pollfd[1].events = POLLIN; + ready_for_fork = true; + for (;;) { ret = poll(pollfd, 2, -1); if (ret <= 0) { diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index cb055282c89c..cc5629c3d2aa 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -32,6 +32,7 @@ #include #include #include +#include #include "../kselftest.h" #include "vm_util.h" @@ -103,6 +104,7 @@ extern bool map_shared; extern bool test_uffdio_wp; extern unsigned long long *count_verify; extern volatile bool test_uffdio_copy_eexist; +extern atomic_bool ready_for_fork; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2b9f8cc52639..21ec23206ab4 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, @@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) @@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -1427,7 +1437,8 @@ uffd_test_case_t uffd_tests[] = { .uffd_fn = uffd_sigbus_wp_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_SIGBUS | - UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP, + UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | + UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "events", diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c02990bbd56f..9007c420d52c 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,7 +3,7 @@ #include #include #include -#include /* ffsl() */ +#include /* ffsl() */ #include /* _SC_PAGESIZE */ #define BIT_ULL(nr) (1ULL << (nr)) diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index a2662348cdb1..b7b54d646b93 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,7 +6,9 @@ #include "../kselftest_harness.h" -struct in6_addr in6addr_v4mapped_any = { +static const __u32 in4addr_any = INADDR_ANY; +static const __u32 in4addr_loopback = INADDR_LOOPBACK; +static const struct in6_addr in6addr_v4mapped_any = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = { 0, 0, 0, 0 } }; - -struct in6_addr in6addr_v4mapped_loopback = { +static const struct in6_addr in6addr_v4mapped_loopback = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = { } }; +#define NR_SOCKETS 8 + FIXTURE(bind_wildcard) { - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; + int fd[NR_SOCKETS]; + socklen_t addrlen[NR_SOCKETS]; + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + } addr[NR_SOCKETS]; }; FIXTURE_VARIANT(bind_wildcard) { - const __u32 addr4_const; - const struct in6_addr *addr6_const; - int expected_errno; + sa_family_t family[2]; + const void *addr[2]; + bool ipv6_only[2]; + + /* 6 bind() calls below follow two bind() for the defined 2 addresses: + * + * 0.0.0.0 + * 127.0.0.1 + * :: + * ::1 + * ::ffff:0.0.0.0 + * ::ffff:127.0.0.1 + */ + int expected_errno[NR_SOCKETS]; + int expected_reuse_errno[NR_SOCKETS]; }; +/* (IPv4, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv4, IPv6) */ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; +/* (IPv6, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv6) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i, + int family, const void *addr_const) +{ + if (family == AF_INET) { + struct sockaddr_in *addr4 = &self->addr[i].addr4; + const __u32 *addr4_const = addr_const; + + addr4->sin_family = AF_INET; + addr4->sin_port = htons(0); + addr4->sin_addr.s_addr = htonl(*addr4_const); + + self->addrlen[i] = sizeof(struct sockaddr_in); + } else { + struct sockaddr_in6 *addr6 = &self->addr[i].addr6; + const struct in6_addr *addr6_const = addr_const; + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(0); + addr6->sin6_addr = *addr6_const; + + self->addrlen[i] = sizeof(struct sockaddr_in6); + } +} + FIXTURE_SETUP(bind_wildcard) { - self->addr4.sin_family = AF_INET; - self->addr4.sin_port = htons(0); - self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + setup_addr(self, 0, variant->family[0], variant->addr[0]); + setup_addr(self, 1, variant->family[1], variant->addr[1]); - self->addr6.sin6_family = AF_INET6; - self->addr6.sin6_port = htons(0); - self->addr6.sin6_addr = *variant->addr6_const; + setup_addr(self, 2, AF_INET, &in4addr_any); + setup_addr(self, 3, AF_INET, &in4addr_loopback); + + setup_addr(self, 4, AF_INET6, &in6addr_any); + setup_addr(self, 5, AF_INET6, &in6addr_loopback); + setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any); + setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback); } FIXTURE_TEARDOWN(bind_wildcard) { + int i; + + for (i = 0; i < NR_SOCKETS; i++) + close(self->fd[i]); } -void bind_sockets(struct __test_metadata *_metadata, - FIXTURE_DATA(bind_wildcard) *self, - int expected_errno, - struct sockaddr *addr1, socklen_t addrlen1, - struct sockaddr *addr2, socklen_t addrlen2) +void bind_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + const FIXTURE_VARIANT(bind_wildcard) *variant, + int i, int reuse) { - int fd[2]; int ret; - fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[0], 0); + self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0); + ASSERT_GT(self->fd[i], 0); - ret = bind(fd[0], addr1, addrlen1); - ASSERT_EQ(ret, 0); - - ret = getsockname(fd[0], addr1, &addrlen1); - ASSERT_EQ(ret, 0); - - ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; - - fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[1], 0); - - ret = bind(fd[1], addr2, addrlen2); - if (expected_errno) { - ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, expected_errno); - } else { + if (i < 2 && variant->ipv6_only[i]) { + ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int)); ASSERT_EQ(ret, 0); } - close(fd[1]); - close(fd[0]); + if (i < 2 && reuse) { + ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } + + self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port; + + ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]); + + if (reuse) { + if (variant->expected_reuse_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_reuse_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } + } else { + if (variant->expected_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } + } + + if (i == 0) { + ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]); + ASSERT_EQ(ret, 0); + } } -TEST_F(bind_wildcard, v4_v6) +TEST_F(bind_wildcard, plain) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr4, sizeof(self->addr4), - (struct sockaddr *)&self->addr6, sizeof(self->addr6)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, 0); } -TEST_F(bind_wildcard, v6_v4) +TEST_F(bind_wildcard, reuseaddr) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr6, sizeof(self->addr6), - (struct sockaddr *)&self->addr4, sizeof(self->addr4)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEADDR); +} + +TEST_F(bind_wildcard, reuseport) +{ + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEPORT); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4c4248554826..4131f3263a48 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -383,12 +383,14 @@ do_transfer() local stat_cookierx_last local stat_csum_err_s local stat_csum_err_c + local stat_tcpfb_last_l stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -457,11 +459,13 @@ do_transfer() local stat_cookietx_now local stat_cookierx_now local stat_ooo_now + local stat_tcpfb_now_l stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -508,6 +512,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 + fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then extra+=" WARN: CookieSent: did not advance" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5e9211e89825..e4403236f655 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -729,7 +729,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 @@ -3610,6 +3610,8 @@ endpoint_tests() local tests_pid=$! wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03..bfb07dc49518 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c index 2fb6dd8adba6..8b984fa04286 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/proc.c +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) pos = strchr(line, ' ') + 1; - if (fscanf(fnetstat, type->header_name) == EOF) + if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF) test_error("fscanf(%s)", type->header_name); if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') test_error("Unexpected netstat format (%c)", tmp); diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index 92276f916f2f..e408b9243b2c 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; void __test_msg(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_print_msg(buf); + ksft_print_msg("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } void __test_ok(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_test_result_pass(buf); + ksft_test_result_pass("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } void __test_fail(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_test_result_fail(buf); + ksft_test_result_fail("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } void __test_xfail(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_test_result_xfail(buf); + ksft_test_result_xfail("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } void __test_error(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_test_result_error(buf); + ksft_test_result_error("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } void __test_skip(const char *buf) { pthread_mutex_lock(&ksft_print_lock); - ksft_test_result_skip(buf); + ksft_test_result_skip("%s", buf); pthread_mutex_unlock(&ksft_print_lock); } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index 7df8b8700e39..a2fe88d35ac0 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[], static void test_client_active_rst(unsigned int port) { - /* one in queue, another accept()ed */ - unsigned int wait_for = backlog + 2; int i, sk[3], err; bool is_writable[ARRAY_SIZE(sk)] = {false}; unsigned int last = ARRAY_SIZE(sk) - 1; @@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port) for (i = 0; i < last; i++) { err = _test_connect_socket(sk[i], this_ip_dest, port, (i == 0) ? TEST_TIMEOUT_SEC : -1); - if (err < 0) test_error("failed to connect()"); } - synchronize_threads(); /* 2: connection accept()ed, another queued */ - err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC); + synchronize_threads(); /* 2: two connections: one accept()ed, another queued */ + err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC); if (err < 0) test_error("test_wait_fds(): %d", err); + /* async connect() with third sk to get into request_sock_queue */ + err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + if (err < 0) + test_error("failed to connect()"); + synchronize_threads(); /* 3: close listen socket */ if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); @@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port) test_ok("Verified established tcp connection"); synchronize_threads(); /* 4: finishing up */ - err = _test_connect_socket(sk[last], this_ip_dest, port, -1); - if (err < 0) - test_error("failed to connect()"); synchronize_threads(); /* 5: closed active sk */ - err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL, - wait_for, TEST_TIMEOUT_SEC); + /* + * Wait for 2 connections: one accepted, another in the accept queue, + * the one in request_sock_queue won't get fully established, so + * doesn't receive an active RST, see inet_csk_listen_stop(). + */ + err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC); if (err < 0) test_error("select(): %d", err); diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 452de131fa3a..517930f9721b 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -21,7 +21,7 @@ static void make_listen(int sk) static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, const char *tst) { - struct tcp_ao_info_opt tmp; + struct tcp_ao_info_opt tmp = {}; socklen_t len = sizeof(tmp); if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 74ff9fb2a6f0..58da5de99ac4 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -1177,6 +1177,7 @@ encap_params_common() local plen=$1; shift local enc_ethtype=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1195,11 +1196,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - no match" @@ -1212,20 +1213,20 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - no match" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - no match" @@ -1238,11 +1239,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - no match" @@ -1250,11 +1251,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - no match" @@ -1272,6 +1273,7 @@ encap_params_ipv4_ipv4() local plen=32 local enc_ethtype="ip" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1279,7 +1281,7 @@ encap_params_ipv4_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv4() @@ -1291,6 +1293,7 @@ encap_params_ipv6_ipv4() local plen=32 local enc_ethtype="ip" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1298,7 +1301,7 @@ encap_params_ipv6_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } encap_params_ipv4_ipv6() @@ -1310,6 +1313,7 @@ encap_params_ipv4_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1317,7 +1321,7 @@ encap_params_ipv4_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv6() @@ -1329,6 +1333,7 @@ encap_params_ipv6_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1336,7 +1341,7 @@ encap_params_ipv6_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } starg_exclude_ir_common() @@ -1347,6 +1352,7 @@ starg_exclude_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1368,14 +1374,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1385,14 +1391,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1407,6 +1413,7 @@ starg_exclude_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1415,7 +1422,7 @@ starg_exclude_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv4() @@ -1426,6 +1433,7 @@ starg_exclude_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1434,7 +1442,7 @@ starg_exclude_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_ir_ipv4_ipv6() @@ -1445,6 +1453,7 @@ starg_exclude_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1453,7 +1462,7 @@ starg_exclude_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv6() @@ -1464,6 +1473,7 @@ starg_exclude_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1472,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_common() @@ -1483,6 +1493,7 @@ starg_include_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1504,14 +1515,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1521,14 +1532,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1543,6 +1554,7 @@ starg_include_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1551,7 +1563,7 @@ starg_include_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv4() @@ -1562,6 +1574,7 @@ starg_include_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1570,7 +1583,7 @@ starg_include_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_ipv4_ipv6() @@ -1581,6 +1594,7 @@ starg_include_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1589,7 +1603,7 @@ starg_include_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv6() @@ -1600,6 +1614,7 @@ starg_include_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1608,7 +1623,7 @@ starg_include_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_common() @@ -1618,6 +1633,7 @@ starg_exclude_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1635,12 +1651,12 @@ starg_exclude_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1648,7 +1664,7 @@ starg_exclude_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1660,6 +1676,7 @@ starg_exclude_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1667,7 +1684,7 @@ starg_exclude_p2mp_ipv4_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1678,6 +1695,7 @@ starg_exclude_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1685,7 +1703,7 @@ starg_exclude_p2mp_ipv6_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1696,6 +1714,7 @@ starg_exclude_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1703,7 +1722,7 @@ starg_exclude_p2mp_ipv4_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1714,6 +1733,7 @@ starg_exclude_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1721,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1732,6 +1752,7 @@ starg_include_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1749,12 +1770,12 @@ starg_include_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1762,7 +1783,7 @@ starg_include_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1774,6 +1795,7 @@ starg_include_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1781,7 +1803,7 @@ starg_include_p2mp_ipv4_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1792,6 +1814,7 @@ starg_include_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1799,7 +1822,7 @@ starg_include_p2mp_ipv6_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1810,6 +1833,7 @@ starg_include_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1817,7 +1841,7 @@ starg_include_p2mp_ipv4_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1828,6 +1852,7 @@ starg_include_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1835,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1847,6 +1872,7 @@ egress_vni_translation_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1882,20 +1908,20 @@ egress_vni_translation_common() # Make sure that packets sent from the first VTEP over VLAN 10 are # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on # the second VTEP, since it is configured as PVID. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - PVID configured" # Remove PVID flag from VLAN 4000 on the second VTEP and make sure # packets are no longer received by the SVI interface. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - no PVID configured" # Reconfigure the PVID and make sure packets are received again. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 log_test $? 0 "Egress VNI translation - PVID reconfigured" } @@ -1908,6 +1934,7 @@ egress_vni_translation_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1915,7 +1942,7 @@ egress_vni_translation_ipv4_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv4() @@ -1926,6 +1953,7 @@ egress_vni_translation_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1933,7 +1961,7 @@ egress_vni_translation_ipv6_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } egress_vni_translation_ipv4_ipv6() @@ -1944,6 +1972,7 @@ egress_vni_translation_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1951,7 +1980,7 @@ egress_vni_translation_ipv4_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv6() @@ -1962,6 +1991,7 @@ egress_vni_translation_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1969,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } all_zeros_mdb_common() @@ -1982,12 +2012,18 @@ all_zeros_mdb_common() local vtep4_ip=$1; shift local plen=$1; shift local ipv4_grp=239.1.1.1 + local ipv4_grp_dmac=01:00:5e:01:01:01 local ipv4_unreg_grp=239.2.2.2 + local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 local ipv4_ll_grp=224.0.0.100 + local ipv4_ll_grp_dmac=01:00:5e:00:00:64 local ipv4_src=192.0.2.129 local ipv6_grp=ff0e::1 + local ipv6_grp_dmac=33:33:00:00:00:01 local ipv6_unreg_grp=ff0e::2 + local ipv6_unreg_grp_dmac=33:33:00:00:00:02 local ipv6_ll_grp=ff02::1 + local ipv6_ll_grp_dmac=33:33:00:00:00:01 local ipv6_src=2001:db8:100::1 # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic @@ -2023,7 +2059,7 @@ all_zeros_mdb_common() # Send registered IPv4 multicast and make sure it only arrives to the # first VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Registered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2031,7 +2067,7 @@ all_zeros_mdb_common() # Send unregistered IPv4 multicast that is not link-local and make sure # it arrives to the first and second VTEPs. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Unregistered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -2039,7 +2075,7 @@ all_zeros_mdb_common() # Send IPv4 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Link-local IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -2074,7 +2110,7 @@ all_zeros_mdb_common() # Send registered IPv6 multicast and make sure it only arrives to the # third VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 1 log_test $? 0 "Registered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 0 @@ -2082,7 +2118,7 @@ all_zeros_mdb_common() # Send unregistered IPv6 multicast that is not link-local and make sure # it arrives to the third and fourth VTEPs. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Unregistered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -2090,7 +2126,7 @@ all_zeros_mdb_common() # Send IPv6 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Link-local IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -2165,6 +2201,7 @@ mdb_fdb_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -2188,7 +2225,7 @@ mdb_fdb_common() # Send IP multicast traffic and make sure it is forwarded by the MDB # and only arrives to the first VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2205,7 +2242,7 @@ mdb_fdb_common() # Remove the MDB entry and make sure that IP multicast is now forwarded # by the FDB to the second VTEP. run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 2 @@ -2221,14 +2258,15 @@ mdb_fdb_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv4() @@ -2240,14 +2278,15 @@ mdb_fdb_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_fdb_ipv4_ipv6() @@ -2259,14 +2298,15 @@ mdb_fdb_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv6() @@ -2278,14 +2318,15 @@ mdb_fdb_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_grp1_loop() @@ -2320,7 +2361,9 @@ mdb_torture_common() local vtep1_ip=$1; shift local vtep2_ip=$1; shift local grp1=$1; shift + local grp1_dmac=$1; shift local grp2=$1; shift + local grp2_dmac=$1; shift local src=$1; shift local mz=$1; shift local pid1 @@ -2345,9 +2388,9 @@ mdb_torture_common() pid1=$! mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & pid2=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid3=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid4=$! sleep 30 @@ -2363,15 +2406,17 @@ mdb_torture_ipv4_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv4() @@ -2380,15 +2425,17 @@ mdb_torture_ipv6_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } mdb_torture_ipv4_ipv6() @@ -2397,15 +2444,17 @@ mdb_torture_ipv4_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv6() @@ -2414,15 +2463,17 @@ mdb_torture_ipv6_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } ################################################################################ diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index c6eda21cefb6..f27a12d2a2c9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt) EXPECT_EQ(errno, EINVAL); } +TEST_F(tls, recv_efault) +{ + char *rec1 = "1111111111"; + char *rec2 = "2222222222"; + struct msghdr hdr = {}; + struct iovec iov[2]; + char recv_mem[12]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, 10, 0), 10); + EXPECT_EQ(send(self->fd, rec2, 10, 0), 10); + + iov[0].iov_base = recv_mem; + iov[0].iov_len = sizeof(recv_mem); + iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */ + iov[1].iov_len = 1; + + hdr.msg_iovlen = 2; + hdr.msg_iov = iov; + + EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1); + EXPECT_EQ(recv_mem[0], rec1[0]); + + ret = recvmsg(self->cfd, &hdr, 0); + EXPECT_LE(ret, sizeof(recv_mem)); + EXPECT_GE(ret, 9); + EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0); + if (ret > 9) + EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); +} + FIXTURE(tls_err) { int fd, cfd; diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 380cb15e942e..83ed987cff34 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -244,7 +244,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -258,13 +258,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 1d975bf52af3..85b3baa3f7f3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -34,7 +34,7 @@ #endif #ifndef UDP_MAX_SEGMENTS -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #endif #define CONST_MTU_TEST 1500 diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c index 505294da1b9f..d6f99eb9be65 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -154,7 +154,7 @@ static int dev_papr_vpd_null_handle(void) static int papr_vpd_close_handle_without_reading(void) { const int devfd = open(DEVPATH, O_RDONLY); - struct papr_location_code lc; + struct papr_location_code lc = { .str = "", }; int fd; SKIP_IF_MSG(devfd < 0 && errno == ENOENT, diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings index 6091b45d226b..a953c96aa16e 100644 --- a/tools/testing/selftests/seccomp/settings +++ b/tools/testing/selftests/seccomp/settings @@ -1 +1 @@ -timeout=120 +timeout=180 diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index d49dd3ffd0d9..c001dd79179d 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } @@ -184,80 +184,71 @@ static int check_timer_create(int which) return 0; } -int remain; -__thread int got_signal; +static pthread_t ctd_thread; +static volatile int ctd_count, ctd_failed; -static void *distribution_thread(void *arg) +static void ctd_sighandler(int sig) { - while (__atomic_load_n(&remain, __ATOMIC_RELAXED)); - return NULL; + if (pthread_self() != ctd_thread) + ctd_failed = 1; + ctd_count--; } -static void distribution_handler(int nr) +static void *ctd_thread_func(void *arg) { - if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED)) - __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED); -} - -/* - * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID - * timer signals. This primarily tests that the kernel does not favour any one. - */ -static int check_timer_distribution(void) -{ - int err, i; - timer_t id; - const int nthreads = 10; - pthread_t threads[nthreads]; struct itimerspec val = { .it_value.tv_sec = 0, .it_value.tv_nsec = 1000 * 1000, .it_interval.tv_sec = 0, .it_interval.tv_nsec = 1000 * 1000, }; + timer_t id; - remain = nthreads + 1; /* worker threads + this thread */ - signal(SIGALRM, distribution_handler); - err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); - if (err < 0) { - ksft_perror("Can't create timer"); - return -1; - } - err = timer_settime(id, 0, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + /* 1/10 seconds to ensure the leader sleeps */ + usleep(10000); - for (i = 0; i < nthreads; i++) { - err = pthread_create(&threads[i], NULL, distribution_thread, - NULL); - if (err) { - ksft_print_msg("Can't create thread: %s (%d)\n", - strerror(errno), errno); - return -1; - } - } + ctd_count = 100; + if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id)) + return "Can't create timer\n"; + if (timer_settime(id, 0, &val, NULL)) + return "Can't set timer\n"; - /* Wait for all threads to receive the signal. */ - while (__atomic_load_n(&remain, __ATOMIC_RELAXED)); + while (ctd_count > 0 && !ctd_failed) + ; - for (i = 0; i < nthreads; i++) { - err = pthread_join(threads[i], NULL); - if (err) { - ksft_print_msg("Can't join thread: %s (%d)\n", - strerror(errno), errno); - return -1; - } - } + if (timer_delete(id)) + return "Can't delete timer\n"; - if (timer_delete(id)) { - ksft_perror("Can't delete timer"); - return -1; - } + return NULL; +} - ksft_test_result_pass("check_timer_distribution\n"); +/* + * Test that only the running thread receives the timer signal. + */ +static int check_timer_distribution(void) +{ + const char *errmsg; + + signal(SIGALRM, ctd_sighandler); + + errmsg = "Can't create thread\n"; + if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL)) + goto err; + + errmsg = "Can't join thread\n"; + if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg) + goto err; + + if (!ctd_failed) + ksft_test_result_pass("check signal distribution\n"); + else if (ksft_min_kernel_version(6, 3)) + ksft_test_result_fail("check signal distribution\n"); + else + ksft_test_result_skip("check signal distribution (old kernel)\n"); return 0; +err: + ksft_print_msg("%s", errmsg); + return -1; } int main(int argc, char **argv) diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 48b9a803235a..d13ebde20322 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -21,9 +21,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ - - - #include #include #include @@ -62,45 +59,47 @@ int clear_time_state(void) #define NUM_FREQ_OUTOFRANGE 4 #define NUM_FREQ_INVALID 2 +#define SHIFTED_PPM (1 << 16) + long valid_freq[NUM_FREQ_VALID] = { - -499<<16, - -450<<16, - -400<<16, - -350<<16, - -300<<16, - -250<<16, - -200<<16, - -150<<16, - -100<<16, - -75<<16, - -50<<16, - -25<<16, - -10<<16, - -5<<16, - -1<<16, + -499 * SHIFTED_PPM, + -450 * SHIFTED_PPM, + -400 * SHIFTED_PPM, + -350 * SHIFTED_PPM, + -300 * SHIFTED_PPM, + -250 * SHIFTED_PPM, + -200 * SHIFTED_PPM, + -150 * SHIFTED_PPM, + -100 * SHIFTED_PPM, + -75 * SHIFTED_PPM, + -50 * SHIFTED_PPM, + -25 * SHIFTED_PPM, + -10 * SHIFTED_PPM, + -5 * SHIFTED_PPM, + -1 * SHIFTED_PPM, -1000, - 1<<16, - 5<<16, - 10<<16, - 25<<16, - 50<<16, - 75<<16, - 100<<16, - 150<<16, - 200<<16, - 250<<16, - 300<<16, - 350<<16, - 400<<16, - 450<<16, - 499<<16, + 1 * SHIFTED_PPM, + 5 * SHIFTED_PPM, + 10 * SHIFTED_PPM, + 25 * SHIFTED_PPM, + 50 * SHIFTED_PPM, + 75 * SHIFTED_PPM, + 100 * SHIFTED_PPM, + 150 * SHIFTED_PPM, + 200 * SHIFTED_PPM, + 250 * SHIFTED_PPM, + 300 * SHIFTED_PPM, + 350 * SHIFTED_PPM, + 400 * SHIFTED_PPM, + 450 * SHIFTED_PPM, + 499 * SHIFTED_PPM, }; long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { - -1000<<16, - -550<<16, - 550<<16, - 1000<<16, + -1000 * SHIFTED_PPM, + -550 * SHIFTED_PPM, + 550 * SHIFTED_PPM, + 1000 * SHIFTED_PPM, }; #define LONG_MAX (~0UL>>1) diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py new file mode 100755 index 000000000000..d9b042097da7 --- /dev/null +++ b/tools/testing/selftests/turbostat/defcolumns.py @@ -0,0 +1,60 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +# +# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns +# which are only visible when running with --debug. +# +expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip() +expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'') + +# +# Run turbostat with no options for 10 seconds and send SIGINT +# +timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s'] +turbostat_argv = [turbostat, '-i', '0.250'] + +print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) +actual_columns = proc_turbostat.stdout.split(b'\n')[0] +if expected_columns != actual_columns: + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) +print('OK') + +# +# Same, but with --debug +# +turbostat_argv.append('--debug') + +print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) +actual_columns = proc_turbostat.stdout.split(b'\n')[0] +if expected_columns_debug != actual_columns: + print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}') + exit(1) +print('OK') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fb49c2a60200..ff0a20565f90 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -832,8 +832,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * mn_active_invalidate_count (see above) instead of * mmu_invalidate_in_progress. */ - gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, - hva_range.may_block); + gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end); /* * If one or more memslots were found and thus zapped, notify arch code diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index ecefc7ec51af..715f19669d01 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -26,13 +26,11 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, #ifdef CONFIG_HAVE_KVM_PFNCACHE void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, - unsigned long end, - bool may_block); + unsigned long end); #else static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, - unsigned long end, - bool may_block) + unsigned long end) { } #endif /* HAVE_KVM_PFNCACHE */ diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 4e07112a24c2..e3453e869e92 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -23,7 +23,7 @@ * MMU notifier 'invalidate_range_start' hook. */ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, - unsigned long end, bool may_block) + unsigned long end) { struct gfn_to_pfn_cache *gpc; @@ -57,6 +57,19 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, spin_unlock(&kvm->gpc_lock); } +static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva, + unsigned long len) +{ + unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) : + offset_in_page(gpa); + + /* + * The cached access must fit within a single page. The 'len' argument + * to activate() and refresh() exists only to enforce that. + */ + return offset + len <= PAGE_SIZE; +} + bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); @@ -74,7 +87,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len) if (kvm_is_error_hva(gpc->uhva)) return false; - if (offset_in_page(gpc->uhva) + len > PAGE_SIZE) + if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len)) return false; if (!gpc->valid) @@ -232,8 +245,7 @@ out_error: return -EFAULT; } -static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, - unsigned long len) +static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva) { unsigned long page_offset; bool unmap_old = false; @@ -247,15 +259,6 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva))) return -EINVAL; - /* - * The cached acces must fit within a single page. The 'len' argument - * exists only to enforce that. - */ - page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) : - offset_in_page(gpa); - if (page_offset + len > PAGE_SIZE) - return -EINVAL; - lockdep_assert_held(&gpc->refresh_lock); write_lock_irq(&gpc->lock); @@ -270,6 +273,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l old_uhva = PAGE_ALIGN_DOWN(gpc->uhva); if (kvm_is_error_gpa(gpa)) { + page_offset = offset_in_page(uhva); + gpc->gpa = INVALID_GPA; gpc->memslot = NULL; gpc->uhva = PAGE_ALIGN_DOWN(uhva); @@ -279,6 +284,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l } else { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); + page_offset = offset_in_page(gpa); + if (gpc->gpa != gpa || gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva)) { gfn_t gfn = gpa_to_gfn(gpa); @@ -354,6 +361,9 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len) guard(mutex)(&gpc->refresh_lock); + if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len)) + return -EINVAL; + /* * If the GPA is valid then ignore the HVA, as a cache can be GPA-based * or HVA-based, not both. For GPA-based caches, the HVA will be @@ -361,7 +371,7 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len) */ uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD; - return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len); + return __kvm_gpc_refresh(gpc, gpc->gpa, uhva); } void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm) @@ -381,6 +391,9 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned { struct kvm *kvm = gpc->kvm; + if (!kvm_gpc_is_valid_len(gpa, uhva, len)) + return -EINVAL; + guard(mutex)(&gpc->refresh_lock); if (!gpc->active) { @@ -400,11 +413,18 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned gpc->active = true; write_unlock_irq(&gpc->lock); } - return __kvm_gpc_refresh(gpc, gpa, uhva, len); + return __kvm_gpc_refresh(gpc, gpa, uhva); } int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) { + /* + * Explicitly disallow INVALID_GPA so that the magic value can be used + * by KVM to differentiate between GPA-based and HVA-based caches. + */ + if (WARN_ON_ONCE(kvm_is_error_gpa(gpa))) + return -EINVAL; + return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len); }