Skip to content

Commit 14fa3d1

Browse files
Florian Westphalgregkh
authored andcommitted
netfilter: nf_tables: avoid chain re-validation if possible
[ Upstream commit 8e1a1bc ] Hamza Mahfooz reports cpu soft lock-ups in nft_chain_validate(): watchdog: BUG: soft lockup - CPU#1 stuck for 27s! [iptables-nft-re:37547] [..] RIP: 0010:nft_chain_validate+0xcb/0x110 [nf_tables] [..] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_table_validate+0x6b/0xb0 [nf_tables] nf_tables_validate+0x8b/0xa0 [nf_tables] nf_tables_commit+0x1df/0x1eb0 [nf_tables] [..] Currently nf_tables will traverse the entire table (chain graph), starting from the entry points (base chains), exploring all possible paths (chain jumps). But there are cases where we could avoid revalidation. Consider: 1 input -> j2 -> j3 2 input -> j2 -> j3 3 input -> j1 -> j2 -> j3 Then the second rule does not need to revalidate j2, and, by extension j3, because this was already checked during validation of the first rule. We need to validate it only for rule 3. This is needed because chain loop detection also ensures we do not exceed the jump stack: Just because we know that j2 is cycle free, its last jump might now exceed the allowed stack size. We also need to update all reachable chains with the new largest observed call depth. Care has to be taken to revalidate even if the chain depth won't be an issue: chain validation also ensures that expressions are not called from invalid base chains. For example, the masquerade expression can only be called from NAT postrouting base chains. Therefore we also need to keep record of the base chain context (type, hooknum) and revalidate if the chain becomes reachable from a different hook location. Reported-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Closes: https://lore.kernel.org/netfilter-devel/20251118221735.GA5477@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/ Tested-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent feb28b6 commit 14fa3d1

2 files changed

Lines changed: 91 additions & 12 deletions

File tree

include/net/netfilter/nf_tables.h

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,29 @@ struct nft_rule_blob {
10931093
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
10941094
};
10951095

1096+
enum nft_chain_types {
1097+
NFT_CHAIN_T_DEFAULT = 0,
1098+
NFT_CHAIN_T_ROUTE,
1099+
NFT_CHAIN_T_NAT,
1100+
NFT_CHAIN_T_MAX
1101+
};
1102+
1103+
/**
1104+
* struct nft_chain_validate_state - validation state
1105+
*
1106+
* If a chain is encountered again during table validation it is
1107+
* possible to avoid revalidation provided the calling context is
1108+
* compatible. This structure stores relevant calling context of
1109+
* previous validations.
1110+
*
1111+
* @hook_mask: the hook numbers and locations the chain is linked to
1112+
* @depth: the deepest call chain level the chain is linked to
1113+
*/
1114+
struct nft_chain_validate_state {
1115+
u8 hook_mask[NFT_CHAIN_T_MAX];
1116+
u8 depth;
1117+
};
1118+
10961119
/**
10971120
* struct nft_chain - nf_tables chain
10981121
*
@@ -1111,6 +1134,7 @@ struct nft_rule_blob {
11111134
* @udlen: user data length
11121135
* @udata: user data in the chain
11131136
* @blob_next: rule blob pointer to the next in the chain
1137+
* @vstate: validation state
11141138
*/
11151139
struct nft_chain {
11161140
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1130,23 +1154,17 @@ struct nft_chain {
11301154

11311155
/* Only used during control plane commit phase: */
11321156
struct nft_rule_blob *blob_next;
1157+
struct nft_chain_validate_state vstate;
11331158
};
11341159

1135-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
1160+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
11361161
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
11371162
const struct nft_set_iter *iter,
11381163
struct nft_elem_priv *elem_priv);
11391164
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
11401165
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11411166
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11421167

1143-
enum nft_chain_types {
1144-
NFT_CHAIN_T_DEFAULT = 0,
1145-
NFT_CHAIN_T_ROUTE,
1146-
NFT_CHAIN_T_NAT,
1147-
NFT_CHAIN_T_MAX
1148-
};
1149-
11501168
/**
11511169
* struct nft_chain_type - nf_tables chain type info
11521170
*

net/netfilter/nf_tables_api.c

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
120120

121121
table->validate_state = new_validate_state;
122122
}
123+
124+
static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
125+
const struct nft_chain *chain)
126+
{
127+
const struct nft_base_chain *base_chain;
128+
enum nft_chain_types type;
129+
u8 hooknum;
130+
131+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
132+
return false;
133+
134+
base_chain = nft_base_chain(ctx->chain);
135+
hooknum = base_chain->ops.hooknum;
136+
type = base_chain->type->type;
137+
138+
/* chain is already validated for this call depth */
139+
if (chain->vstate.depth >= ctx->level &&
140+
chain->vstate.hook_mask[type] & BIT(hooknum))
141+
return true;
142+
143+
return false;
144+
}
145+
123146
static void nf_tables_trans_destroy_work(struct work_struct *w);
124147

125148
static void nft_trans_gc_work(struct work_struct *work);
@@ -3898,6 +3921,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
38983921
nf_tables_rule_destroy(ctx, rule);
38993922
}
39003923

3924+
static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
3925+
{
3926+
const struct nft_base_chain *base_chain;
3927+
enum nft_chain_types type;
3928+
u8 hooknum;
3929+
3930+
/* ctx->chain must hold the calling base chain. */
3931+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
3932+
memset(&chain->vstate, 0, sizeof(chain->vstate));
3933+
return;
3934+
}
3935+
3936+
base_chain = nft_base_chain(ctx->chain);
3937+
hooknum = base_chain->ops.hooknum;
3938+
type = base_chain->type->type;
3939+
3940+
BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
3941+
3942+
chain->vstate.hook_mask[type] |= BIT(hooknum);
3943+
if (chain->vstate.depth < ctx->level)
3944+
chain->vstate.depth = ctx->level;
3945+
}
3946+
39013947
/** nft_chain_validate - loop detection and hook validation
39023948
*
39033949
* @ctx: context containing call depth and base chain
@@ -3907,15 +3953,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
39073953
* and set lookups until either the jump limit is hit or all reachable
39083954
* chains have been validated.
39093955
*/
3910-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
3956+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
39113957
{
39123958
struct nft_expr *expr, *last;
39133959
struct nft_rule *rule;
39143960
int err;
39153961

3962+
BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
39163963
if (ctx->level == NFT_JUMP_STACK_SIZE)
39173964
return -EMLINK;
39183965

3966+
if (ctx->level > 0) {
3967+
/* jumps to base chains are not allowed. */
3968+
if (nft_is_base_chain(chain))
3969+
return -ELOOP;
3970+
3971+
if (nft_chain_vstate_valid(ctx, chain))
3972+
return 0;
3973+
}
3974+
39193975
list_for_each_entry(rule, &chain->rules, list) {
39203976
if (fatal_signal_pending(current))
39213977
return -EINTR;
@@ -3936,6 +3992,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
39363992
}
39373993
}
39383994

3995+
nft_chain_vstate_update(ctx, chain);
39393996
return 0;
39403997
}
39413998
EXPORT_SYMBOL_GPL(nft_chain_validate);
@@ -3947,7 +4004,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
39474004
.net = net,
39484005
.family = table->family,
39494006
};
3950-
int err;
4007+
int err = 0;
39514008

39524009
list_for_each_entry(chain, &table->chains, list) {
39534010
if (!nft_is_base_chain(chain))
@@ -3956,12 +4013,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
39564013
ctx.chain = chain;
39574014
err = nft_chain_validate(&ctx, chain);
39584015
if (err < 0)
3959-
return err;
4016+
goto err;
39604017

39614018
cond_resched();
39624019
}
39634020

3964-
return 0;
4021+
err:
4022+
list_for_each_entry(chain, &table->chains, list)
4023+
memset(&chain->vstate, 0, sizeof(chain->vstate));
4024+
4025+
return err;
39654026
}
39664027

39674028
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,

0 commit comments

Comments
 (0)