58ae0b5150
The empty memory nodes, where no memory resides in, are allowed. The NUMA node IDs are still valid and parsed, but memory may be added to them through hotplug afterwards. Currently, QEMU fails to boot when multiple empty memory nodes are specified. It's caused by device-tree population failure and duplicated memory node names. The device-tree specification doesn't provide how empty NUMA nodes are handled. Besides, I finds difficulty to get where this case is documented. So lets add a section for empty memory nodes to cover it in NUMA binding document. Signed-off-by: Gavin Shan <gshan@redhat.com> Link: https://lore.kernel.org/r/20210927064119.127285-2-gshan@redhat.com Signed-off-by: Rob Herring <robh@kernel.org>
320 lines
7.8 KiB
Plaintext
320 lines
7.8 KiB
Plaintext
==============================================================================
|
|
NUMA binding description.
|
|
==============================================================================
|
|
|
|
==============================================================================
|
|
1 - Introduction
|
|
==============================================================================
|
|
|
|
Systems employing a Non Uniform Memory Access (NUMA) architecture contain
|
|
collections of hardware resources including processors, memory, and I/O buses,
|
|
that comprise what is commonly known as a NUMA node.
|
|
Processor accesses to memory within the local NUMA node is generally faster
|
|
than processor accesses to memory outside of the local NUMA node.
|
|
DT defines interfaces that allow the platform to convey NUMA node
|
|
topology information to OS.
|
|
|
|
==============================================================================
|
|
2 - numa-node-id
|
|
==============================================================================
|
|
|
|
For the purpose of identification, each NUMA node is associated with a unique
|
|
token known as a node id. For the purpose of this binding
|
|
a node id is a 32-bit integer.
|
|
|
|
A device node is associated with a NUMA node by the presence of a
|
|
numa-node-id property which contains the node id of the device.
|
|
|
|
Example:
|
|
/* numa node 0 */
|
|
numa-node-id = <0>;
|
|
|
|
/* numa node 1 */
|
|
numa-node-id = <1>;
|
|
|
|
==============================================================================
|
|
3 - distance-map
|
|
==============================================================================
|
|
|
|
The optional device tree node distance-map describes the relative
|
|
distance (memory latency) between all numa nodes.
|
|
|
|
- compatible : Should at least contain "numa-distance-map-v1".
|
|
|
|
- distance-matrix
|
|
This property defines a matrix to describe the relative distances
|
|
between all numa nodes.
|
|
It is represented as a list of node pairs and their relative distance.
|
|
|
|
Note:
|
|
1. Each entry represents distance from first node to second node.
|
|
The distances are equal in either direction.
|
|
2. The distance from a node to self (local distance) is represented
|
|
with value 10 and all internode distance should be represented with
|
|
a value greater than 10.
|
|
3. distance-matrix should have entries in lexicographical ascending
|
|
order of nodes.
|
|
4. There must be only one device node distance-map which must
|
|
reside in the root node.
|
|
5. If the distance-map node is not present, a default
|
|
distance-matrix is used.
|
|
|
|
Example:
|
|
4 nodes connected in mesh/ring topology as below,
|
|
|
|
0_______20______1
|
|
| |
|
|
| |
|
|
20 20
|
|
| |
|
|
| |
|
|
|_______________|
|
|
3 20 2
|
|
|
|
if relative distance for each hop is 20,
|
|
then internode distance would be,
|
|
0 -> 1 = 20
|
|
1 -> 2 = 20
|
|
2 -> 3 = 20
|
|
3 -> 0 = 20
|
|
0 -> 2 = 40
|
|
1 -> 3 = 40
|
|
|
|
and dt presentation for this distance matrix is,
|
|
|
|
distance-map {
|
|
compatible = "numa-distance-map-v1";
|
|
distance-matrix = <0 0 10>,
|
|
<0 1 20>,
|
|
<0 2 40>,
|
|
<0 3 20>,
|
|
<1 0 20>,
|
|
<1 1 10>,
|
|
<1 2 20>,
|
|
<1 3 40>,
|
|
<2 0 40>,
|
|
<2 1 20>,
|
|
<2 2 10>,
|
|
<2 3 20>,
|
|
<3 0 20>,
|
|
<3 1 40>,
|
|
<3 2 20>,
|
|
<3 3 10>;
|
|
};
|
|
|
|
==============================================================================
|
|
4 - Empty memory nodes
|
|
==============================================================================
|
|
|
|
Empty memory nodes, which no memory resides in, are allowed. There are no
|
|
device nodes for these empty memory nodes. However, the NUMA node IDs and
|
|
distance maps are still valid and memory may be added into them through
|
|
hotplug afterwards.
|
|
|
|
Example:
|
|
|
|
memory@0 {
|
|
device_type = "memory";
|
|
reg = <0x0 0x0 0x0 0x80000000>;
|
|
numa-node-id = <0>;
|
|
};
|
|
|
|
memory@80000000 {
|
|
device_type = "memory";
|
|
reg = <0x0 0x80000000 0x0 0x80000000>;
|
|
numa-node-id = <1>;
|
|
};
|
|
|
|
/* Empty memory node 2 and 3 */
|
|
distance-map {
|
|
compatible = "numa-distance-map-v1";
|
|
distance-matrix = <0 0 10>,
|
|
<0 1 20>,
|
|
<0 2 40>,
|
|
<0 3 20>,
|
|
<1 0 20>,
|
|
<1 1 10>,
|
|
<1 2 20>,
|
|
<1 3 40>,
|
|
<2 0 40>,
|
|
<2 1 20>,
|
|
<2 2 10>,
|
|
<2 3 20>,
|
|
<3 0 20>,
|
|
<3 1 40>,
|
|
<3 2 20>,
|
|
<3 3 10>;
|
|
};
|
|
|
|
==============================================================================
|
|
5 - Example dts
|
|
==============================================================================
|
|
|
|
Dual socket system consists of 2 boards connected through ccn bus and
|
|
each board having one socket/soc of 8 cpus, memory and pci bus.
|
|
|
|
memory@c00000 {
|
|
device_type = "memory";
|
|
reg = <0x0 0xc00000 0x0 0x80000000>;
|
|
/* node 0 */
|
|
numa-node-id = <0>;
|
|
};
|
|
|
|
memory@10000000000 {
|
|
device_type = "memory";
|
|
reg = <0x100 0x0 0x0 0x80000000>;
|
|
/* node 1 */
|
|
numa-node-id = <1>;
|
|
};
|
|
|
|
cpus {
|
|
#address-cells = <2>;
|
|
#size-cells = <0>;
|
|
|
|
cpu@0 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x0>;
|
|
enable-method = "psci";
|
|
/* node 0 */
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@1 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x1>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@2 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x2>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@3 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x3>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@4 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x4>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@5 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x5>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@6 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x6>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@7 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x7>;
|
|
enable-method = "psci";
|
|
numa-node-id = <0>;
|
|
};
|
|
cpu@8 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x8>;
|
|
enable-method = "psci";
|
|
/* node 1 */
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@9 {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0x9>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@a {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xa>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@b {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xb>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@c {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xc>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@d {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xd>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@e {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xe>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
cpu@f {
|
|
device_type = "cpu";
|
|
compatible = "arm,armv8";
|
|
reg = <0x0 0xf>;
|
|
enable-method = "psci";
|
|
numa-node-id = <1>;
|
|
};
|
|
};
|
|
|
|
pcie0: pcie0@848000000000 {
|
|
compatible = "arm,armv8";
|
|
device_type = "pci";
|
|
bus-range = <0 255>;
|
|
#size-cells = <2>;
|
|
#address-cells = <3>;
|
|
reg = <0x8480 0x00000000 0 0x10000000>; /* Configuration space */
|
|
ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
|
|
/* node 0 */
|
|
numa-node-id = <0>;
|
|
};
|
|
|
|
pcie1: pcie1@948000000000 {
|
|
compatible = "arm,armv8";
|
|
device_type = "pci";
|
|
bus-range = <0 255>;
|
|
#size-cells = <2>;
|
|
#address-cells = <3>;
|
|
reg = <0x9480 0x00000000 0 0x10000000>; /* Configuration space */
|
|
ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
|
|
/* node 1 */
|
|
numa-node-id = <1>;
|
|
};
|
|
|
|
distance-map {
|
|
compatible = "numa-distance-map-v1";
|
|
distance-matrix = <0 0 10>,
|
|
<0 1 20>,
|
|
<1 1 10>;
|
|
};
|