From 18f4b75f8ba2dee057ae44a4ca6dbaa7b54111da Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 6 Mar 2023 13:43:26 +0100 Subject: [PATCH 01/14] python: enable summaries from model This requires a change to the shared interface: Making `getNodeFromPath` public. This because Python is doing its own thing and identifying call-backs. --- .../python/dataflow/new/FlowSummary.qll | 70 +++++++++---------- .../data/internal/ApiGraphModels.qll | 9 +++ 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 5e82700bd0e3..0fac6bfaedd1 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -90,39 +90,37 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari } class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; -// // This gives access to getNodeFromPath, which is not constrained to `CallNode`s -// // as `resolvedSummaryBase` is. -// private import semmle.python.frameworks.data.internal.ApiGraphModels as AGM -// -// private class SummarizedCallableFromModel extends SummarizedCallable { -// string package; -// string type; -// string path; -// SummarizedCallableFromModel() { -// ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and -// this = package + ";" + type + ";" + path -// } -// override CallCfgNode getACall() { -// exists(API::CallNode base | -// ModelOutput::resolvedSummaryBase(package, type, path, base) and -// result = base.getACall() -// ) -// } -// override ArgumentNode getACallback() { -// exists(API::Node base | -// base = AGM::getNodeFromPath(package, type, path) and -// result = base.getAValueReachableFromSource() -// ) -// } -// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { -// exists(string kind | -// ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) -// | -// kind = "value" and -// preservesValue = true -// or -// kind = "taint" and -// preservesValue = false -// ) -// } -// } + +private class SummarizedCallableFromModel extends SummarizedCallable { + string type; + string path; + + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _) and + this = type + ";" + path + } + + override CallCfgNode getACall() { + exists(API::CallNode base | + ModelOutput::resolvedSummaryBase(type, path, base) and + result = base.getACall() + ) + } + + override ArgumentNode getACallback() { + exists(API::Node base | + ModelOutput::resolvedSummaryRefBase(type, path, base) and + result = base.getAValueReachableFromSource() + ) + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) + } +} diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 227f4ea22fbb..6688ba36cd09 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. From 3cf9e3e69231a7d42d75cd2c8dca670bb20b6f54 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 20 Mar 2023 14:35:18 +0100 Subject: [PATCH 02/14] Py/js/ruby: sync files --- .../frameworks/data/internal/ApiGraphModels.qll | 9 +++++++++ .../ruby/frameworks/data/internal/ApiGraphModels.qll | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 227f4ea22fbb..6688ba36cd09 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 227f4ea22fbb..6688ba36cd09 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. From 6554e804ddc9f5a6faaa2a72b9f9c9e68fdeff79 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 24 Mar 2023 09:48:31 +0100 Subject: [PATCH 03/14] python: add test for model summaries (but no summaries yet) --- .../NormalTaintTrackingTest.expected | 2 + .../NormalTaintTrackingTest.ql | 3 + .../model-summaries/TestSummaries.qll | 8 +++ .../model-summaries/model_summaries.py | 70 +++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected create mode 100644 python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql create mode 100644 python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll create mode 100644 python/ql/test/experimental/dataflow/model-summaries/model_summaries.py diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected new file mode 100644 index 000000000000..3875da4e143c --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected @@ -0,0 +1,2 @@ +missingAnnotationOnSink +failures diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql b/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql new file mode 100644 index 000000000000..afb44b6b2edb --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql @@ -0,0 +1,3 @@ +import python +private import TestSummaries +import experimental.dataflow.TestUtil.NormalTaintTrackingTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll new file mode 100644 index 000000000000..5069c406a605 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll @@ -0,0 +1,8 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs + +private class StepsFromModel extends ModelInput::SummaryModelCsv { + override predicate row(string row) { none() } +} diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py new file mode 100644 index 000000000000..498d445efdfe --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -0,0 +1,70 @@ + +import sys +import os + +sys.path.append(os.path.dirname(os.path.dirname((__file__)))) +from testlib import expects + +# These are defined so that we can evaluate the test code. +NONSOURCE = "not a source" +SOURCE = "source" + + +def is_source(x): + return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j + + +def SINK(x): + if is_source(x): + print("OK") + else: + print("Unexpected flow", x) + + +def SINK_F(x): + if is_source(x): + print("Unexpected flow", x) + else: + print("OK") + + +from Foo import identity + +# Simple summary +tainted = identity(SOURCE) +SINK(tainted) # $ MISSING: flow="SOURCE, l:-1 -> tainted" + +# Lambda summary +tainted_lambda = apply_lambda(lambda x: x + 1, SOURCE) +SINK(tainted_lambda) # $ MISSING: flow="SOURCE, l:-1 -> tainted_lambda" + +# A lambda that breaks the flow +untainted_lambda = apply_lambda(lambda x: 1, SOURCE) +SINK_F(untainted_lambda) + +# Collection summaries +tainted_list = my_reversed([SOURCE]) +SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" + +# Complex summaries +def add_colon(x): + return x + ":" + +tainted_mapped = list_map(add_colon, [SOURCE]) +SINK(tainted_mapped[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped[0]" + +def explicit_identity(x): + return x + +tainted_mapped_explicit = list_map(explicit_identity, [SOURCE]) +SINK(tainted_mapped_explicit[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" + +tainted_mapped_summary = list_map(identity, [SOURCE]) +SINK(tainted_mapped_summary[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" + +tainted_list = append_to_list([], SOURCE) +SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" + +from json import my_loads as json_loads +tainted_resultlist = json_loads(SOURCE) +SINK(tainted_resultlist[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_resultlist[0]" From 229641070fd65a891fd2055fcea595dca143246f Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Sun, 18 Jun 2023 22:01:47 +0200 Subject: [PATCH 04/14] python: rename summaries --- .../model-summaries/model_summaries.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py index 498d445efdfe..23cc4990aba8 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -28,43 +28,43 @@ def SINK_F(x): print("OK") -from Foo import identity +from Foo import MS_identity # Simple summary -tainted = identity(SOURCE) +tainted = MS_identity(SOURCE) SINK(tainted) # $ MISSING: flow="SOURCE, l:-1 -> tainted" # Lambda summary -tainted_lambda = apply_lambda(lambda x: x + 1, SOURCE) +tainted_lambda = MS_apply_lambda(lambda x: x + 1, SOURCE) SINK(tainted_lambda) # $ MISSING: flow="SOURCE, l:-1 -> tainted_lambda" # A lambda that breaks the flow -untainted_lambda = apply_lambda(lambda x: 1, SOURCE) +untainted_lambda = MS_apply_lambda(lambda x: 1, SOURCE) SINK_F(untainted_lambda) # Collection summaries -tainted_list = my_reversed([SOURCE]) +tainted_list = MS_reversed([SOURCE]) SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" # Complex summaries def add_colon(x): return x + ":" -tainted_mapped = list_map(add_colon, [SOURCE]) +tainted_mapped = MS_list_map(add_colon, [SOURCE]) SINK(tainted_mapped[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped[0]" def explicit_identity(x): return x -tainted_mapped_explicit = list_map(explicit_identity, [SOURCE]) +tainted_mapped_explicit = MS_list_map(explicit_identity, [SOURCE]) SINK(tainted_mapped_explicit[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" -tainted_mapped_summary = list_map(identity, [SOURCE]) +tainted_mapped_summary = MS_list_map(MS_identity, [SOURCE]) SINK(tainted_mapped_summary[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" -tainted_list = append_to_list([], SOURCE) +tainted_list = MS_append_to_list([], SOURCE) SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" -from json import my_loads as json_loads +from json import MS_loads as json_loads tainted_resultlist = json_loads(SOURCE) SINK(tainted_resultlist[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_resultlist[0]" From eb3c33dfe2037924cd78693a4dfd7b8debc9c023 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 19 Jun 2023 11:41:06 +0200 Subject: [PATCH 05/14] python: remove erronous `getACall()` `base` is already the `CallNode` we want. --- python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 0fac6bfaedd1..8b80e13d06de 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -100,12 +100,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { this = type + ";" + path } - override CallCfgNode getACall() { - exists(API::CallNode base | - ModelOutput::resolvedSummaryBase(type, path, base) and - result = base.getACall() - ) - } + override CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } override ArgumentNode getACallback() { exists(API::Node base | From e111a19524421bb97a1b2342e6714d4c34cec0de Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 19 Jun 2023 11:41:41 +0200 Subject: [PATCH 06/14] python: split tests into taint and value and add summaries --- .../model-summaries/TestSummaries.qll | 8 --- .../NormalDataflowTest.expected} | 0 .../dataflow/NormalDataflowTest.ql | 3 + .../dataflow/TestSummaries.qll | 20 ++++++ .../dataflow/model_summaries.py | 69 +++++++++++++++++++ .../taint/NormalTaintTrackingTest.expected | 2 + .../{ => taint}/NormalTaintTrackingTest.ql | 0 .../model-summaries/taint/TestSummaries.qll | 19 +++++ .../model_summaries_taint.py} | 20 +++--- 9 files changed, 123 insertions(+), 18 deletions(-) delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll rename python/ql/test/experimental/dataflow/model-summaries/{NormalTaintTrackingTest.expected => dataflow/NormalDataflowTest.expected} (100%) create mode 100644 python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql create mode 100644 python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll create mode 100644 python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py create mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected rename python/ql/test/experimental/dataflow/model-summaries/{ => taint}/NormalTaintTrackingTest.ql (100%) create mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll rename python/ql/test/experimental/dataflow/model-summaries/{model_summaries.py => taint/model_summaries_taint.py} (62%) diff --git a/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll deleted file mode 100644 index 5069c406a605..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll +++ /dev/null @@ -1,8 +0,0 @@ -private import python -private import semmle.python.dataflow.new.FlowSummary -private import semmle.python.frameworks.data.ModelsAsData -private import semmle.python.ApiGraphs - -private class StepsFromModel extends ModelInput::SummaryModelCsv { - override predicate row(string row) { none() } -} diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.expected similarity index 100% rename from python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.expected rename to python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.expected diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql new file mode 100644 index 000000000000..3e311335e14d --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql @@ -0,0 +1,3 @@ +import python +private import TestSummaries +import experimental.dataflow.TestUtil.NormalDataflowTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll new file mode 100644 index 000000000000..d97702eec419 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll @@ -0,0 +1,20 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs + +private class StepsFromModel extends ModelInput::SummaryModelCsv { + override predicate row(string row) { + row = + [ + "Foo;Member[MS_identity];Argument[0];ReturnValue;value", + "Foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", + "Foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", + "Foo;Member[MS_reversed];Argument[0].ListElement;ReturnValue.ListElement;value", + "Foo;Member[MS_list_map];Argument[1].ListElement;Argument[0].Parameter[0];value", + "Foo;Member[MS_list_map];Argument[0].ReturnValue;ReturnValue.ListElement;value", + "Foo;Member[MS_append_to_list];Argument[0].ListElement;ReturnValue.ListElement;value", + "Foo;Member[MS_append_to_list];Argument[1];ReturnValue.ListElement;value" + ] + } +} diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py new file mode 100644 index 000000000000..c81fddf44ece --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py @@ -0,0 +1,69 @@ + +import sys +import os + +sys.path.append(os.path.dirname(os.path.dirname((__file__)))) +from testlib import expects + +# These are defined so that we can evaluate the test code. +NONSOURCE = "not a source" +SOURCE = "source" + + +def is_source(x): + return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j + + +def SINK(x): + if is_source(x): + print("OK") + else: + print("Unexpected flow", x) + + +def SINK_F(x): + if is_source(x): + print("Unexpected flow", x) + else: + print("OK") + + +from Foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list + +# Simple summary +tainted = MS_identity(SOURCE) +SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted" + +# Lambda summary +tainted_lambda = MS_apply_lambda(lambda x: [x], SOURCE) +SINK(tainted_lambda[0]) # $ flow="SOURCE, l:-1 -> tainted_lambda[0]" + +# A lambda that breaks the flow +untainted_lambda = MS_apply_lambda(lambda x: 1, SOURCE) +SINK_F(untainted_lambda) + +# Collection summaries +tainted_list = MS_reversed([SOURCE]) +SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" + +# Complex summaries +def box(x): + return [x] + +tainted_mapped = MS_list_map(box, [SOURCE]) +SINK(tainted_mapped[0][0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0][0]" + +def explicit_identity(x): + return x + +tainted_mapped_explicit = MS_list_map(explicit_identity, [SOURCE]) +SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" + +tainted_mapped_summary = MS_list_map(MS_identity, [SOURCE]) +SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" + +tainted_list = MS_append_to_list([], SOURCE) +SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" + +tainted_list = MS_append_to_list([SOURCE], NONSOURCE) +SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected new file mode 100644 index 000000000000..3875da4e143c --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected @@ -0,0 +1,2 @@ +missingAnnotationOnSink +failures diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql b/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql similarity index 100% rename from python/ql/test/experimental/dataflow/model-summaries/NormalTaintTrackingTest.ql rename to python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll new file mode 100644 index 000000000000..57861d3c0224 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll @@ -0,0 +1,19 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs + +private class StepsFromModel extends ModelInput::SummaryModelCsv { + override predicate row(string row) { + row = + [ + "Foo;Member[MS_identity];Argument[0];ReturnValue;value", + "Foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", + "Foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", + "Foo;Member[MS_reversed];Argument[0];ReturnValue;taint", + "Foo;Member[MS_list_map];Argument[1];ReturnValue;taint", + "Foo;Member[MS_append_to_list];Argument[0];ReturnValue;taint", + "json;Member[MS_loads];Argument[0];ReturnValue;taint" + ] + } +} diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py similarity index 62% rename from python/ql/test/experimental/dataflow/model-summaries/model_summaries.py rename to python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py index 23cc4990aba8..44ca79c116e0 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py @@ -28,15 +28,15 @@ def SINK_F(x): print("OK") -from Foo import MS_identity +from Foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list # Simple summary tainted = MS_identity(SOURCE) -SINK(tainted) # $ MISSING: flow="SOURCE, l:-1 -> tainted" +SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted" # Lambda summary tainted_lambda = MS_apply_lambda(lambda x: x + 1, SOURCE) -SINK(tainted_lambda) # $ MISSING: flow="SOURCE, l:-1 -> tainted_lambda" +SINK(tainted_lambda) # $ flow="SOURCE, l:-1 -> tainted_lambda" # A lambda that breaks the flow untainted_lambda = MS_apply_lambda(lambda x: 1, SOURCE) @@ -44,27 +44,27 @@ def SINK_F(x): # Collection summaries tainted_list = MS_reversed([SOURCE]) -SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" +SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" # Complex summaries def add_colon(x): return x + ":" tainted_mapped = MS_list_map(add_colon, [SOURCE]) -SINK(tainted_mapped[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped[0]" +SINK(tainted_mapped[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0]" def explicit_identity(x): return x tainted_mapped_explicit = MS_list_map(explicit_identity, [SOURCE]) -SINK(tainted_mapped_explicit[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" +SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" tainted_mapped_summary = MS_list_map(MS_identity, [SOURCE]) -SINK(tainted_mapped_summary[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" +SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" -tainted_list = MS_append_to_list([], SOURCE) -SINK(tainted_list[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_list[0]" +tainted_list = MS_append_to_list([SOURCE], NONSOURCE) +SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" from json import MS_loads as json_loads tainted_resultlist = json_loads(SOURCE) -SINK(tainted_resultlist[0]) # $ MISSING: flow="SOURCE, l:-1 -> tainted_resultlist[0]" +SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]" From 5ceac5a77110d7da6f44899ed059dfc6aa717de0 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 20 Jun 2023 11:53:31 +0200 Subject: [PATCH 07/14] python: add changenote --- .../ql/lib/change-notes/2023-06-20-summaries-from-models.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2023-06-20-summaries-from-models.md diff --git a/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md b/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md new file mode 100644 index 000000000000..feded1bb6c5f --- /dev/null +++ b/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* It is now possible to specify flow summaries in the format "MyPkg;Member[list_map];Argument[1].ListElement;Argument[0].Parameter[0];value" From cb2de69f5a052877f14e70e533fbdba6a91fa49d Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 20 Jun 2023 16:13:38 +0200 Subject: [PATCH 08/14] python: consolidate tests also change `Foo` -> `foo` --- .../model-summaries/InlineTaintTest.expected | 3 + .../model-summaries/InlineTaintTest.ql | 3 + .../NormalDataflowTest.expected | 0 .../{dataflow => }/NormalDataflowTest.ql | 0 .../model-summaries/TestSummaries.qll | 25 ++++ .../dataflow/TestSummaries.qll | 20 --- .../dataflow/model_summaries.py | 69 ---------- .../model-summaries/model_summaries.py | 128 ++++++++++++++++++ .../taint/NormalTaintTrackingTest.expected | 2 - .../taint/NormalTaintTrackingTest.ql | 3 - .../model-summaries/taint/TestSummaries.qll | 19 --- .../taint/model_summaries_taint.py | 70 ---------- 12 files changed, 159 insertions(+), 183 deletions(-) create mode 100644 python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected create mode 100644 python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql rename python/ql/test/experimental/dataflow/model-summaries/{dataflow => }/NormalDataflowTest.expected (100%) rename python/ql/test/experimental/dataflow/model-summaries/{dataflow => }/NormalDataflowTest.ql (100%) create mode 100644 python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py create mode 100644 python/ql/test/experimental/dataflow/model-summaries/model_summaries.py delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll delete mode 100644 python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected new file mode 100644 index 000000000000..79d760d87f42 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql new file mode 100644 index 000000000000..f7f84cb84798 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql @@ -0,0 +1,3 @@ +import python +private import TestSummaries +import experimental.meta.InlineTaintTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected similarity index 100% rename from python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.expected rename to python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.ql similarity index 100% rename from python/ql/test/experimental/dataflow/model-summaries/dataflow/NormalDataflowTest.ql rename to python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.ql diff --git a/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll new file mode 100644 index 000000000000..5f1e0a1f90b9 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll @@ -0,0 +1,25 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs + +private class StepsFromModel extends ModelInput::SummaryModelCsv { + override predicate row(string row) { + row = + [ + "foo;Member[MS_identity];Argument[0];ReturnValue;value", + "foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", + "foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", + "foo;Member[MS_reversed];Argument[0].ListElement;ReturnValue.ListElement;value", + "foo;Member[MS_reversed];Argument[0];ReturnValue;taint", + "foo;Member[MS_list_map];Argument[1].ListElement;Argument[0].Parameter[0];value", + "foo;Member[MS_list_map];Argument[0].ReturnValue;ReturnValue.ListElement;value", + "foo;Member[MS_list_map];Argument[1];ReturnValue;taint", + "foo;Member[MS_append_to_list];Argument[0].ListElement;ReturnValue.ListElement;value", + "foo;Member[MS_append_to_list];Argument[1];ReturnValue.ListElement;value", + "foo;Member[MS_append_to_list];Argument[0];ReturnValue;taint", + "foo;Member[MS_append_to_list];Argument[1];ReturnValue;taint", + "json;Member[MS_loads];Argument[0];ReturnValue;taint" + ] + } +} diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll deleted file mode 100644 index d97702eec419..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/dataflow/TestSummaries.qll +++ /dev/null @@ -1,20 +0,0 @@ -private import python -private import semmle.python.dataflow.new.FlowSummary -private import semmle.python.frameworks.data.ModelsAsData -private import semmle.python.ApiGraphs - -private class StepsFromModel extends ModelInput::SummaryModelCsv { - override predicate row(string row) { - row = - [ - "Foo;Member[MS_identity];Argument[0];ReturnValue;value", - "Foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", - "Foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", - "Foo;Member[MS_reversed];Argument[0].ListElement;ReturnValue.ListElement;value", - "Foo;Member[MS_list_map];Argument[1].ListElement;Argument[0].Parameter[0];value", - "Foo;Member[MS_list_map];Argument[0].ReturnValue;ReturnValue.ListElement;value", - "Foo;Member[MS_append_to_list];Argument[0].ListElement;ReturnValue.ListElement;value", - "Foo;Member[MS_append_to_list];Argument[1];ReturnValue.ListElement;value" - ] - } -} diff --git a/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py deleted file mode 100644 index c81fddf44ece..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/dataflow/model_summaries.py +++ /dev/null @@ -1,69 +0,0 @@ - -import sys -import os - -sys.path.append(os.path.dirname(os.path.dirname((__file__)))) -from testlib import expects - -# These are defined so that we can evaluate the test code. -NONSOURCE = "not a source" -SOURCE = "source" - - -def is_source(x): - return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j - - -def SINK(x): - if is_source(x): - print("OK") - else: - print("Unexpected flow", x) - - -def SINK_F(x): - if is_source(x): - print("Unexpected flow", x) - else: - print("OK") - - -from Foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list - -# Simple summary -tainted = MS_identity(SOURCE) -SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted" - -# Lambda summary -tainted_lambda = MS_apply_lambda(lambda x: [x], SOURCE) -SINK(tainted_lambda[0]) # $ flow="SOURCE, l:-1 -> tainted_lambda[0]" - -# A lambda that breaks the flow -untainted_lambda = MS_apply_lambda(lambda x: 1, SOURCE) -SINK_F(untainted_lambda) - -# Collection summaries -tainted_list = MS_reversed([SOURCE]) -SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" - -# Complex summaries -def box(x): - return [x] - -tainted_mapped = MS_list_map(box, [SOURCE]) -SINK(tainted_mapped[0][0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0][0]" - -def explicit_identity(x): - return x - -tainted_mapped_explicit = MS_list_map(explicit_identity, [SOURCE]) -SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" - -tainted_mapped_summary = MS_list_map(MS_identity, [SOURCE]) -SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" - -tainted_list = MS_append_to_list([], SOURCE) -SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" - -tainted_list = MS_append_to_list([SOURCE], NONSOURCE) -SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py new file mode 100644 index 000000000000..3a98e04ceb77 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -0,0 +1,128 @@ + +import sys +import os + +sys.path.append(os.path.dirname(os.path.dirname((__file__)))) +from testlib import expects + +# These are defined so that we can evaluate the test code. +NONSOURCE = "not a source" +SOURCE = "source" + + +def is_source(x): + return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j + + +def SINK(x): + if is_source(x): + print("OK") + else: + print("Unexpected flow", x) + + +def SINK_F(x): + if is_source(x): + print("Unexpected flow", x) + else: + print("OK") + +ensure_tainted = ensure_not_tainted = print +TAINTED_STRING = "TAINTED_STRING" + +from foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list + +# Simple summary +via_identity = MS_identity(SOURCE) +SINK(via_identity) # $ flow="SOURCE, l:-1 -> via_identity" + +tainted = MS_identity(TAINTED_STRING) +ensure_tainted(tainted) # $ tainted + + +# Lambda summary +via_lambda = MS_apply_lambda(lambda x: [x], SOURCE) +SINK(via_lambda[0]) # $ flow="SOURCE, l:-1 -> via_lambda[0]" + +tainted_lambda = MS_apply_lambda(lambda x: [x], TAINTED_STRING) +ensure_tainted(tainted_lambda) # $ tainted + + +# A lambda that breaks the flow +not_via_lambda = MS_apply_lambda(lambda x: 1, SOURCE) +SINK_F(not_via_lambda) + +untainted_lambda = MS_apply_lambda(lambda x: 1, TAINTED_STRING) +ensure_not_tainted(untainted_lambda) + +# Collection summaries +via_reversed = MS_reversed([SOURCE]) +SINK(via_reversed[0]) # $ flow="SOURCE, l:-1 -> via_reversed[0]" + +tainted_list = MS_reversed([TAINTED_STRING]) +ensure_tainted(tainted_list[0]) # $ tainted + +# Complex summaries +def box(x): + return [x] + +via_map = MS_list_map(box, [SOURCE]) +SINK(via_map[0][0]) # $ flow="SOURCE, l:-1 -> via_map[0][0]" + +tainted_mapped = MS_list_map(box, [TAINTED_STRING]) +ensure_tainted(tainted_mapped[0][0]) # $ tainted + +def explicit_identity(x): + return x + +via_map_explicit = MS_list_map(explicit_identity, [SOURCE]) +SINK(via_map_explicit[0]) # $ flow="SOURCE, l:-1 -> via_map_explicit[0]" + +tainted_mapped_explicit = MS_list_map(explicit_identity, [TAINTED_STRING]) +tainted_mapped_explicit_implicit = MS_list_map(explicit_identity, TAINTED_LIST) +ensure_tainted( + tainted_mapped_explicit, # $ tainted + tainted_mapped_explicit[0], # $ tainted + tainted_mapped_explicit_implicit, # $ tainted + tainted_mapped_explicit_implicit[0] # $ tainted + ) + +via_map_summary = MS_list_map(MS_identity, [SOURCE]) +SINK(via_map_summary[0]) # $ flow="SOURCE, l:-1 -> via_map_summary[0]" + +tainted_mapped_summary = MS_list_map(MS_identity, [TAINTED_STRING]) +tainted_mapped_summary_implicit = MS_list_map(MS_identity, TAINTED_LIST) +ensure_tainted( + tainted_mapped_summary, # $ tainted + tainted_mapped_summary[0], # $ tainted + tainted_mapped_summary_implicit, # $ tainted + tainted_mapped_summary_implicit[0] # $ tainted + ) + +via_append_el = MS_append_to_list([], SOURCE) +SINK(via_append_el[0]) # $ flow="SOURCE, l:-1 -> via_append_el[0]" + +tainted_list_el = MS_append_to_list([], TAINTED_STRING) +ensure_tainted( + tainted_list_el, # $ tainted + tainted_list_el[0] # $ tainted + ) + +via_append = MS_append_to_list([SOURCE], NONSOURCE) +SINK(via_append[0]) # $ flow="SOURCE, l:-1 -> via_append[0]" + +tainted_list = MS_append_to_list([TAINTED_STRING], NONSOURCE) +tainted_list_implicit = MS_append_to_list(TAINTED_LIST, NONSOURCE) +ensure_tainted( + tainted_list, # $ tainted + tainted_list[0], # $ tainted + tainted_list_implicit, # $ tainted + tainted_list_implicit[0] # $ tainted + ) + +from json import MS_loads as json_loads +tainted_resultlist = json_loads(TAINTED_STRING) +ensure_tainted( + tainted_resultlist, # $ tainted + tainted_resultlist[0] # $ tainted + ) diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected deleted file mode 100644 index 3875da4e143c..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.expected +++ /dev/null @@ -1,2 +0,0 @@ -missingAnnotationOnSink -failures diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql b/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql deleted file mode 100644 index afb44b6b2edb..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/taint/NormalTaintTrackingTest.ql +++ /dev/null @@ -1,3 +0,0 @@ -import python -private import TestSummaries -import experimental.dataflow.TestUtil.NormalTaintTrackingTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll deleted file mode 100644 index 57861d3c0224..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/taint/TestSummaries.qll +++ /dev/null @@ -1,19 +0,0 @@ -private import python -private import semmle.python.dataflow.new.FlowSummary -private import semmle.python.frameworks.data.ModelsAsData -private import semmle.python.ApiGraphs - -private class StepsFromModel extends ModelInput::SummaryModelCsv { - override predicate row(string row) { - row = - [ - "Foo;Member[MS_identity];Argument[0];ReturnValue;value", - "Foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", - "Foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", - "Foo;Member[MS_reversed];Argument[0];ReturnValue;taint", - "Foo;Member[MS_list_map];Argument[1];ReturnValue;taint", - "Foo;Member[MS_append_to_list];Argument[0];ReturnValue;taint", - "json;Member[MS_loads];Argument[0];ReturnValue;taint" - ] - } -} diff --git a/python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py b/python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py deleted file mode 100644 index 44ca79c116e0..000000000000 --- a/python/ql/test/experimental/dataflow/model-summaries/taint/model_summaries_taint.py +++ /dev/null @@ -1,70 +0,0 @@ - -import sys -import os - -sys.path.append(os.path.dirname(os.path.dirname((__file__)))) -from testlib import expects - -# These are defined so that we can evaluate the test code. -NONSOURCE = "not a source" -SOURCE = "source" - - -def is_source(x): - return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j - - -def SINK(x): - if is_source(x): - print("OK") - else: - print("Unexpected flow", x) - - -def SINK_F(x): - if is_source(x): - print("Unexpected flow", x) - else: - print("OK") - - -from Foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list - -# Simple summary -tainted = MS_identity(SOURCE) -SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted" - -# Lambda summary -tainted_lambda = MS_apply_lambda(lambda x: x + 1, SOURCE) -SINK(tainted_lambda) # $ flow="SOURCE, l:-1 -> tainted_lambda" - -# A lambda that breaks the flow -untainted_lambda = MS_apply_lambda(lambda x: 1, SOURCE) -SINK_F(untainted_lambda) - -# Collection summaries -tainted_list = MS_reversed([SOURCE]) -SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" - -# Complex summaries -def add_colon(x): - return x + ":" - -tainted_mapped = MS_list_map(add_colon, [SOURCE]) -SINK(tainted_mapped[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0]" - -def explicit_identity(x): - return x - -tainted_mapped_explicit = MS_list_map(explicit_identity, [SOURCE]) -SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" - -tainted_mapped_summary = MS_list_map(MS_identity, [SOURCE]) -SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" - -tainted_list = MS_append_to_list([SOURCE], NONSOURCE) -SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" - -from json import MS_loads as json_loads -tainted_resultlist = json_loads(SOURCE) -SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]" From 0f8ebd151986ae66d4e113d8b8757a749fd2dc2f Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 22 Jun 2023 11:31:21 +0200 Subject: [PATCH 09/14] Update python/ql/test/experimental/dataflow/model-summaries/model_summaries.py Co-authored-by: Rasmus Wriedt Larsen --- .../dataflow/model-summaries/model_summaries.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py index 3a98e04ceb77..1a74df113667 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -120,7 +120,14 @@ def explicit_identity(x): tainted_list_implicit[0] # $ tainted ) +# Modeled flow-summary is not value preserving from json import MS_loads as json_loads + +# so no data-flow +SINK_F(json_loads(SOURCE)) +SINK_F(json_loads(SOURCE)[0]) + +# but has taint-flow tainted_resultlist = json_loads(TAINTED_STRING) ensure_tainted( tainted_resultlist, # $ tainted From 2264b119a67ffb150f48730816780d3e0259425e Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 22 Jun 2023 11:52:25 +0200 Subject: [PATCH 10/14] python: more consistent tests - do not test taint flow whne dataflow is established - test taint of both the collection and the expected element --- .../model-summaries/model_summaries.py | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py index 1a74df113667..5074814f3414 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -36,18 +36,10 @@ def SINK_F(x): via_identity = MS_identity(SOURCE) SINK(via_identity) # $ flow="SOURCE, l:-1 -> via_identity" -tainted = MS_identity(TAINTED_STRING) -ensure_tainted(tainted) # $ tainted - - # Lambda summary via_lambda = MS_apply_lambda(lambda x: [x], SOURCE) SINK(via_lambda[0]) # $ flow="SOURCE, l:-1 -> via_lambda[0]" -tainted_lambda = MS_apply_lambda(lambda x: [x], TAINTED_STRING) -ensure_tainted(tainted_lambda) # $ tainted - - # A lambda that breaks the flow not_via_lambda = MS_apply_lambda(lambda x: 1, SOURCE) SINK_F(not_via_lambda) @@ -59,8 +51,11 @@ def SINK_F(x): via_reversed = MS_reversed([SOURCE]) SINK(via_reversed[0]) # $ flow="SOURCE, l:-1 -> via_reversed[0]" -tainted_list = MS_reversed([TAINTED_STRING]) -ensure_tainted(tainted_list[0]) # $ tainted +tainted_list = MS_reversed(TAINTED_LIST) +ensure_tainted( + tainted_list, # $ tainted + tainted_list[0] # $ tainted + ) # Complex summaries def box(x): @@ -69,8 +64,11 @@ def box(x): via_map = MS_list_map(box, [SOURCE]) SINK(via_map[0][0]) # $ flow="SOURCE, l:-1 -> via_map[0][0]" -tainted_mapped = MS_list_map(box, [TAINTED_STRING]) -ensure_tainted(tainted_mapped[0][0]) # $ tainted +tainted_mapped = MS_list_map(box, TAINTED_LIST) +ensure_tainted( + tainted_mapped, # $ tainted + tainted_mapped[0][0] # $ tainted + ) def explicit_identity(x): return x @@ -78,25 +76,19 @@ def explicit_identity(x): via_map_explicit = MS_list_map(explicit_identity, [SOURCE]) SINK(via_map_explicit[0]) # $ flow="SOURCE, l:-1 -> via_map_explicit[0]" -tainted_mapped_explicit = MS_list_map(explicit_identity, [TAINTED_STRING]) -tainted_mapped_explicit_implicit = MS_list_map(explicit_identity, TAINTED_LIST) +tainted_mapped_explicit = MS_list_map(explicit_identity, TAINTED_LIST) ensure_tainted( tainted_mapped_explicit, # $ tainted - tainted_mapped_explicit[0], # $ tainted - tainted_mapped_explicit_implicit, # $ tainted - tainted_mapped_explicit_implicit[0] # $ tainted + tainted_mapped_explicit[0] # $ tainted ) via_map_summary = MS_list_map(MS_identity, [SOURCE]) SINK(via_map_summary[0]) # $ flow="SOURCE, l:-1 -> via_map_summary[0]" -tainted_mapped_summary = MS_list_map(MS_identity, [TAINTED_STRING]) -tainted_mapped_summary_implicit = MS_list_map(MS_identity, TAINTED_LIST) +tainted_mapped_summary = MS_list_map(MS_identity, TAINTED_LIST) ensure_tainted( tainted_mapped_summary, # $ tainted - tainted_mapped_summary[0], # $ tainted - tainted_mapped_summary_implicit, # $ tainted - tainted_mapped_summary_implicit[0] # $ tainted + tainted_mapped_summary[0] # $ tainted ) via_append_el = MS_append_to_list([], SOURCE) @@ -111,13 +103,10 @@ def explicit_identity(x): via_append = MS_append_to_list([SOURCE], NONSOURCE) SINK(via_append[0]) # $ flow="SOURCE, l:-1 -> via_append[0]" -tainted_list = MS_append_to_list([TAINTED_STRING], NONSOURCE) tainted_list_implicit = MS_append_to_list(TAINTED_LIST, NONSOURCE) ensure_tainted( tainted_list, # $ tainted - tainted_list[0], # $ tainted - tainted_list_implicit, # $ tainted - tainted_list_implicit[0] # $ tainted + tainted_list[0] # $ tainted ) # Modeled flow-summary is not value preserving From 86dfc7b66e87b1877ddeb653195d59478685997a Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 23 Jun 2023 08:18:06 +0200 Subject: [PATCH 11/14] python: format --- .../model-summaries/model_summaries.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py index 5074814f3414..64909fcd7411 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -54,8 +54,8 @@ def SINK_F(x): tainted_list = MS_reversed(TAINTED_LIST) ensure_tainted( tainted_list, # $ tainted - tainted_list[0] # $ tainted - ) + tainted_list[0], # $ tainted +) # Complex summaries def box(x): @@ -67,8 +67,8 @@ def box(x): tainted_mapped = MS_list_map(box, TAINTED_LIST) ensure_tainted( tainted_mapped, # $ tainted - tainted_mapped[0][0] # $ tainted - ) + tainted_mapped[0][0], # $ tainted +) def explicit_identity(x): return x @@ -79,8 +79,8 @@ def explicit_identity(x): tainted_mapped_explicit = MS_list_map(explicit_identity, TAINTED_LIST) ensure_tainted( tainted_mapped_explicit, # $ tainted - tainted_mapped_explicit[0] # $ tainted - ) + tainted_mapped_explicit[0], # $ tainted +) via_map_summary = MS_list_map(MS_identity, [SOURCE]) SINK(via_map_summary[0]) # $ flow="SOURCE, l:-1 -> via_map_summary[0]" @@ -88,8 +88,8 @@ def explicit_identity(x): tainted_mapped_summary = MS_list_map(MS_identity, TAINTED_LIST) ensure_tainted( tainted_mapped_summary, # $ tainted - tainted_mapped_summary[0] # $ tainted - ) + tainted_mapped_summary[0], # $ tainted +) via_append_el = MS_append_to_list([], SOURCE) SINK(via_append_el[0]) # $ flow="SOURCE, l:-1 -> via_append_el[0]" @@ -97,8 +97,8 @@ def explicit_identity(x): tainted_list_el = MS_append_to_list([], TAINTED_STRING) ensure_tainted( tainted_list_el, # $ tainted - tainted_list_el[0] # $ tainted - ) + tainted_list_el[0], # $ tainted +) via_append = MS_append_to_list([SOURCE], NONSOURCE) SINK(via_append[0]) # $ flow="SOURCE, l:-1 -> via_append[0]" @@ -106,8 +106,8 @@ def explicit_identity(x): tainted_list_implicit = MS_append_to_list(TAINTED_LIST, NONSOURCE) ensure_tainted( tainted_list, # $ tainted - tainted_list[0] # $ tainted - ) + tainted_list[0], # $ tainted +) # Modeled flow-summary is not value preserving from json import MS_loads as json_loads @@ -120,5 +120,5 @@ def explicit_identity(x): tainted_resultlist = json_loads(TAINTED_STRING) ensure_tainted( tainted_resultlist, # $ tainted - tainted_resultlist[0] # $ tainted - ) + tainted_resultlist[0], # $ tainted +) From 26856a82a6320477d68a8002f3f9dd16982df935 Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 23 Jun 2023 10:15:20 +0200 Subject: [PATCH 12/14] Apply suggestions from code review Co-authored-by: Asger F --- .../javascript/frameworks/data/internal/ApiGraphModels.qll | 2 +- .../semmle/python/frameworks/data/internal/ApiGraphModels.qll | 2 +- .../lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 6688ba36cd09..b33b9e8e2388 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -644,7 +644,7 @@ module ModelOutput { } /** - * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. */ cached predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 6688ba36cd09..b33b9e8e2388 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -644,7 +644,7 @@ module ModelOutput { } /** - * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. */ cached predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 6688ba36cd09..b33b9e8e2388 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -644,7 +644,7 @@ module ModelOutput { } /** - * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. */ cached predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { From 6cb03190fa26adb4dc73e0026facc0556937cbf5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 26 Jun 2023 11:43:51 +0200 Subject: [PATCH 13/14] Python: Updates from inline test being parameterized --- .../dataflow/model-summaries/InlineTaintTest.expected | 3 ++- .../experimental/dataflow/model-summaries/InlineTaintTest.ql | 1 + .../dataflow/model-summaries/NormalDataflowTest.expected | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected index 79d760d87f42..4a72c551661a 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected @@ -1,3 +1,4 @@ +failures argumentToEnsureNotTaintedNotMarkedAsSpurious untaintedArgumentToEnsureTaintedNotMarkedAsMissing -failures +testFailures diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql index f7f84cb84798..551266d74556 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql @@ -1,3 +1,4 @@ import python private import TestSummaries import experimental.meta.InlineTaintTest +import MakeInlineTaintTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected index 3875da4e143c..04431311999c 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected +++ b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected @@ -1,2 +1,3 @@ missingAnnotationOnSink failures +testFailures From 257f9912dd6eb292860fb38874396edc2d57d361 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 26 Jun 2023 12:00:15 +0200 Subject: [PATCH 14/14] Python: Remove one more unnecessary taint test --- .../experimental/dataflow/model-summaries/model_summaries.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py index 64909fcd7411..ee02918b0798 100644 --- a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -44,8 +44,6 @@ def SINK_F(x): not_via_lambda = MS_apply_lambda(lambda x: 1, SOURCE) SINK_F(not_via_lambda) -untainted_lambda = MS_apply_lambda(lambda x: 1, TAINTED_STRING) -ensure_not_tainted(untainted_lambda) # Collection summaries via_reversed = MS_reversed([SOURCE])