sched-ext · kkdwivedi · Apr 3, 2025 · etsal · Apr 17, 2025 · etsal
diff --git a/scheds/rust/scx_layered/src/bpf/intf.h b/scheds/rust/scx_layered/src/bpf/intf.h
@@ -202,6 +202,9 @@ struct cpu_ctx {
 	u32			gn_layer_order[MAX_LAYERS];	/* grouped non-preempt */
 
 	struct cpu_prox_map	prox_map;
+
+	u64			sticky_mod_end_time_ns;
+	u64			sticky_mod_pred_pct;
 };
 
 struct llc_prox_map {
@@ -332,6 +335,9 @@ struct layer {
 
 	char			name[MAX_LAYER_NAME];
 	bool			is_protected;
+
+	u64			sticky_mod_min_ns;
+	u64			sticky_mod_pred_pct;
 };
 
 struct scx_cmd {

diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -52,6 +52,7 @@ const volatile u64 min_open_layer_disallow_preempt_after_ns;
 const volatile u64 lo_fb_wait_ns = 5000000;	/* !0 for veristat */
 const volatile u32 lo_fb_share_ppk = 128;	/* !0 for veristat */
 const volatile bool percpu_kthread_preempt = true;
+int active_sticky_mod = 0;
 
 /* Flag to enable or disable antistall feature */
 const volatile bool enable_antistall = true;
@@ -499,6 +500,11 @@ struct task_ctx {
 	u32			qrt_llc_id;
 
 	char 			join_layer[SCXCMD_COMLEN];
+
+#define STICKY_MOD_NR_BUCKETS 8
+	u64			sticky_mod_buckets[STICKY_MOD_NR_BUCKETS];
+	u64			sticky_mod_nr_cnt;
+	u64			sticky_mod_start_ns;
 };
 
 struct {
@@ -871,6 +877,47 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
 	return cpu;
 }
 
+static __always_inline
+s32 pick_sticky_mod_cpu(struct llc_ctx *llc, struct layer *layer, s32 prev_cpu)
+{
+	u64 time = bpf_ktime_get_ns();
+	const struct cpumask *cpumask;
+	struct cpu_ctx *cpu_ctx;
+	s32 cpu = -1;
+	int i;
+
+	if (!active_sticky_mod)
+		return cpu;
+
+	cpu_ctx = lookup_cpu_ctx(prev_cpu);
+	if (!cpu_ctx)
+		goto llc;
+	if (cpu_ctx->sticky_mod_pred_pct < layer->sticky_mod_pred_pct)
+		goto llc;
+	if (cpu_ctx->sticky_mod_end_time_ns - time > layer->sticky_mod_min_ns)
+		goto llc;
+	return prev_cpu;
+llc:
+	if (!(cpumask = cast_mask(llc->cpumask)))
+		goto out;
+	bpf_for(i, 0, nr_possible_cpus) {
+		if (i == prev_cpu)
+			continue;
+		if (!bpf_cpumask_test_cpu(i, cpumask))
+			continue;
+		if (!(cpu_ctx = lookup_cpu_ctx(i)))
+			continue;
+		if (cpu_ctx->sticky_mod_pred_pct < layer->sticky_mod_pred_pct)
+			continue;
+		if (cpu_ctx->sticky_mod_end_time_ns - time > layer->sticky_mod_min_ns)
+			continue;
+		cpu = i;
+		break;
+	}
+out:
+	return cpu;
+}
+
 static __always_inline
 s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 		  struct cpu_ctx *cpuc, struct task_ctx *taskc, struct layer *layer,
@@ -987,6 +1034,9 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 			cpu = -1;
 			goto out_put;
 		}
+
+		if ((cpu = pick_sticky_mod_cpu(prev_llcc, layer, prev_cpu)) >= 0)
+			goto out_put;
 	}
 
 	/*
@@ -1195,6 +1245,55 @@ static void layer_kick_idle_cpu(struct layer *layer)
 	scx_bpf_put_idle_cpumask(idle_smtmask);
 }
 
+SEC("tp_btf/sched_switch")
+int BPF_PROG(layered_sched_switch, bool ignore, struct task_struct *prev, struct task_struct *next)
+{
+	u64 time = bpf_ktime_get_ns();
+	u64 duration = time, max = 0;
+	u32 beg = 0, end = 50000, i;
+	struct task_ctx *pc, *nc;
+	struct cpu_ctx *c;
+	u32 max_i = 0;
+
+	if (!active_sticky_mod)
+		return 0;
+
+	if (!(pc = lookup_task_ctx_may_fail(prev)))
+		goto next;
+
+	duration -= pc->sticky_mod_start_ns;
+	duration /= 1000;
+
+	pc->sticky_mod_nr_cnt++;
+
+	for (i = 0; i < STICKY_MOD_NR_BUCKETS; i++) {
+		u64 cnt = pc->sticky_mod_buckets[i];
+
+		if (duration >= beg && duration <= end) {
+			pc->sticky_mod_buckets[i]++;
+			cnt++;
+		}
+		if (max < cnt) {
+			max = cnt;
+			max_i = i;
+		}
+		beg += 50000;
+		end += 50000;
+		if (i == STICKY_MOD_NR_BUCKETS - 2)
+			end = -1;
+	}
+
+	if (!(c = lookup_cpu_ctx(-1)))
+		goto next;
+	c->sticky_mod_end_time_ns = (max_i + 1) * 50000;
+	c->sticky_mod_pred_pct = ((max * 100) / pc->sticky_mod_nr_cnt);
+next:
+	if (!(nc = lookup_task_ctx_may_fail(next)))
+		return 0;
+	nc->sticky_mod_start_ns = time;
+	return 0;
+}
+
 void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
 {
 	struct cpu_ctx *cpuc, *task_cpuc;
@@ -1718,6 +1817,9 @@ static __always_inline bool try_consume_layer(u32 layer_id, struct cpu_ctx *cpuc
 				xllc_mig_skipped = true;
 				continue;
 			}
+
+			if (pick_sticky_mod_cpu(remote_llcc, layer, -1) >= 0)
+				continue;
 		}
 
 		if (scx_bpf_dsq_move_to_local(layer_dsq_id(layer_id, *llc_idp)))
@@ -3174,6 +3276,9 @@ static s32 init_layer(int layer_id)
 			return ret;
 	}
 
+	if (layer->sticky_mod_min_ns || layer->sticky_mod_pred_pct)
+		active_sticky_mod++;
+
 	return 0;
 }
 

diff --git a/scheds/rust/scx_layered/src/config.rs b/scheds/rust/scx_layered/src/config.rs
@@ -122,6 +122,10 @@ pub struct LayerCommon {
     pub nodes: Vec<usize>,
     #[serde(default)]
     pub llcs: Vec<usize>,
+    #[serde(default)]
+    pub sticky_mod_min_us: f64,
+    #[serde(default)]
+    pub sticky_mod_pred_pct: f64,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]

diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs
@@ -123,6 +123,8 @@ lazy_static! {
                         perf: 1024,
                         nodes: vec![],
                         llcs: vec![],
+                        sticky_mod_min_us: 0.0,
+                        sticky_mod_pred_pct: 0.0,
                     },
                 },
             },
@@ -154,6 +156,8 @@ lazy_static! {
                         idle_resume_us: None,
                         nodes: vec![],
                         llcs: vec![],
+                        sticky_mod_min_us: 0.0,
+                        sticky_mod_pred_pct: 0.0,
                     },
                 },
             },
@@ -189,6 +193,8 @@ lazy_static! {
                         idle_resume_us: None,
                         nodes: vec![],
                         llcs: vec![],
+                        sticky_mod_min_us: 0.0,
+                        sticky_mod_pred_pct: 0.0,
                     },
                 },
             },
@@ -221,6 +227,8 @@ lazy_static! {
                         idle_resume_us: None,
                         nodes: vec![],
                         llcs: vec![],
+                        sticky_mod_min_us: 0.0,
+                        sticky_mod_pred_pct: 0.0,
                     },
                 },
             },
@@ -428,6 +436,17 @@ lazy_static! {
 ///   the nodes value is set the cpuset of LLCs will be or'ed with the nodes
 ///   config.
 ///
+/// - sticky_mod_min_us: Skip cross-CPU migration if the previous CPU is likely
+///   to be available for execution sooner than this threshold, same applies for
+///   one of the CPUs in the previous LLC.
+///
+/// - sticky_mod_pred_pct: The percentage threshold to decide whether to stick
+///   to previous CPU, or one of the CPUs in the previous LLC, opening up. It
+///   is compared against the percentage of times the process stayed in a
+///   predictable bucket of execution time, increasing confidence on
+///   predictions. E.g. 90 would mean only processes predictable with 90%
+///   accuracy or more will be chosen for stickiness modulation.
+///
 ///
 /// Similar to matches, adding new policies and extending existing ones
 /// should be relatively straightforward.
@@ -1318,6 +1337,8 @@ impl<'a> Scheduler<'a> {
                     disallow_open_after_us,
                     disallow_preempt_after_us,
                     xllc_mig_min_us,
+                    sticky_mod_min_us,
+                    sticky_mod_pred_pct,
                     ..
                 } = spec.kind.common();
 
@@ -1359,6 +1380,8 @@ impl<'a> Scheduler<'a> {
                     }
                     layer.llc_mask |= llcmask_from_llcs(&topo_node.llcs) as u64;
                 }
+                layer.sticky_mod_min_ns = (sticky_mod_min_us * 1000.0) as u64;
+                layer.sticky_mod_pred_pct = sticky_mod_pred_pct.clamp(0.0, 100.0) as u64;
             }
 
             layer.is_protected.write(match spec.kind {