From 91f23567424a3e4036b9edeefb6a7ebc53a4c5a6 Mon Sep 17 00:00:00 2001 From: Fabian Yamaguchi Date: Fri, 1 Jan 2021 16:35:01 +0100 Subject: [PATCH] First version of `joern-scan` (#13) * Boilerplate for `JoernScan` * Push reflection voodoo for safe keeping. * `QueryDatabase` done * Collapse hierarchy * Add `name` field to queries * Simplify package layout * More simplification * Fix build * Remove schema extension because it's slow and not needed here * QueryDb works with EngineContext now * Fix build, add test * Add `joern-scan` script * Update README.md * Tuning of queries and shell script * Factor out joern-specific code from `QueryDatabase` * Cleanup * Improve README.md --- .github/workflows/pr.yml | 2 +- README.md | 154 ++++-------------- build.sbt | 6 +- install.sh | 9 +- joern-scan | 15 ++ project/Versions.scala | 2 +- schema/build.sbt | 55 ------- schema/src/main/resources/schema/ext.json | 1 - .../io/joern/scanners/QueryDatabase.scala | 142 ++++++++++++++++ src/main/scala/io/joern/scanners/Scan.scala | 61 +++++++ .../scanners/c/{vulnscan => }/CopyLoops.scala | 8 +- .../c/{vulnscan => }/HeapBasedOverflow.scala | 10 +- .../joern/scanners/c/InsecureFunctions.scala | 22 +++ .../c/{vulnscan => }/IntegerTruncations.scala | 9 +- .../c/{codequality => }/Metrics.scala | 30 +++- .../c/codequality/CodeQualityScanner.scala | 54 ------ .../scanners/c/vulnscan/CVulnScanner.scala | 47 ------ .../c/vulnscan/InsecureFunctions.scala | 20 --- .../scanners/c/vulnscan/SampleQuerySet.scala | 31 ---- .../scanners/language/ScannerStarters.scala | 12 -- .../scanners/{language => }/package.scala | 23 ++- .../c/{vulnscan => }/CopyLoopTests.scala | 4 +- .../HeapBasedOverflowTests.scala | 3 +- .../InsecureFunctionsTests.scala | 4 +- .../IntegerTruncationsTests.scala | 4 +- .../c/{codequality => }/MetricsTests.scala | 3 +- .../joern/scanners/c/QueryDatabaseTests.scala | 28 ++++ .../scala/io/joern/scanners/c/ScanTests.scala | 15 ++ .../c/vulnscan/SampleQuerySetTests.scala | 16 -- 29 files changed, 376 insertions(+), 414 deletions(-) create mode 100755 joern-scan delete mode 100644 schema/build.sbt delete mode 100644 schema/src/main/resources/schema/ext.json create mode 100644 src/main/scala/io/joern/scanners/QueryDatabase.scala create mode 100644 src/main/scala/io/joern/scanners/Scan.scala rename src/main/scala/io/joern/scanners/c/{vulnscan => }/CopyLoops.scala (89%) rename src/main/scala/io/joern/scanners/c/{vulnscan => }/HeapBasedOverflow.scala (87%) create mode 100644 src/main/scala/io/joern/scanners/c/InsecureFunctions.scala rename src/main/scala/io/joern/scanners/c/{vulnscan => }/IntegerTruncations.scala (72%) rename src/main/scala/io/joern/scanners/c/{codequality => }/Metrics.scala (69%) delete mode 100644 src/main/scala/io/joern/scanners/c/codequality/CodeQualityScanner.scala delete mode 100644 src/main/scala/io/joern/scanners/c/vulnscan/CVulnScanner.scala delete mode 100644 src/main/scala/io/joern/scanners/c/vulnscan/InsecureFunctions.scala delete mode 100644 src/main/scala/io/joern/scanners/c/vulnscan/SampleQuerySet.scala delete mode 100644 src/main/scala/io/joern/scanners/language/ScannerStarters.scala rename src/main/scala/io/joern/scanners/{language => }/package.scala (84%) rename src/test/scala/io/joern/scanners/c/{vulnscan => }/CopyLoopTests.scala (88%) rename src/test/scala/io/joern/scanners/c/{vulnscan => }/HeapBasedOverflowTests.scala (91%) rename src/test/scala/io/joern/scanners/c/{vulnscan => }/InsecureFunctionsTests.scala (87%) rename src/test/scala/io/joern/scanners/c/{vulnscan => }/IntegerTruncationsTests.scala (91%) rename src/test/scala/io/joern/scanners/c/{codequality => }/MetricsTests.scala (97%) create mode 100644 src/test/scala/io/joern/scanners/c/QueryDatabaseTests.scala create mode 100644 src/test/scala/io/joern/scanners/c/ScanTests.scala delete mode 100644 src/test/scala/io/joern/scanners/c/vulnscan/SampleQuerySetTests.scala diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index f8ee9b9..fbaef65 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -15,4 +15,4 @@ jobs: ./install.sh mkdir /tmp/foo echo "int foo(int a, int b, int c, int d, int e, int f) {}" > /tmp/foo/foo.c - ./joern --src /tmp/foo --run codequalityscanner + ./joern --src /tmp/foo --run scan diff --git a/README.md b/README.md index bc5c177..df6e94f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Joern Query Database +# Joern Query Database ("Joern-Scan") This is the central query database for the open-source code analysis platform [Joern](https://github.com/ShiftLeftSecurity/joern). It has @@ -9,9 +9,7 @@ two purposes: The query database is distributed as a standalone library that includes Joern as a dependency. This means that it is not necessary to -install Joern to make use of the scanners in the database. Instead, -scanners can be invoked from any JVM-based program - as the automatic -tests included in the database demonstrate. +install Joern to make use of the queries in the database. At the same time, the database is a Joern extension, that is, when dynamically loaded at startup, its functionality becomes available on @@ -23,22 +21,17 @@ for inclusion in the default distribution. ## Installing and running -The installation scripts downloads joern and installs it in a sub-directory. +The installation script downloads joern and installs it in a sub-directory. The query database is installed as an extension. ``` ./install.sh ``` -The query database currently makes available the following scanners: - -* codequalityscanner - a code quality scanner for C code -* cvulnscanner - a vulnerability scanner for C code - -You can run scanners as follows: +You can run all queries as follows: ``` -./joern --src path/to/code --run --param k1=v1,... +./joern-scan path/to/code ``` For example, @@ -46,24 +39,33 @@ For example, ``` mkdir foo echo "int foo(int a, int b, int c, int d, int e, int f) {}" > foo/foo.c -./joern --src foo --run codequalityscanner +./joern-scan --src foo ``` -runs the code quality scanner and determines that the function `foo` has too many parameters. +runs all queries on the sample code in the directory `foo`, determining that the function `foo` +has too many parameters. + +## Adding your own queries -## Database overview +Please follow the rules below for a tear-free query writing experience: -Each scanner is hosted in a sub package of `io.joern.scanners`, that -is, it is located in a directory in -`src/main/scala/io/joern/scanners`. As an example, let us look into -the `CodeQualityScanner` at `src/main/scala/io/joern/scanners`. The -file `Metrics.scala` contains its queries: +* Queries in the package `io.joern.scanners` are picked up automatically at runtime, + so please put your queries there. +* Each query must begin with the annotation `@q` and must be placed in a query bundle. + A query bundle is simply an `object` that derives from `QueryBundle` +* Queries can have parameters,but you must provide a default value for each parameter +* Please add unit tests for queries. These also serve as a spec for what your query does. +* Please format the code before sending a PR using `sbt scalafmt` and `sbt test:scalafmt` +Take a look at the query bundle `Metrics` at `src/main/scala/io/joern/scanners/c/Metrics.scala` +as an example: ``` -object Metrics { +object Metrics extends QueryBundle { + @q def tooManyParameters(n: Int = 4): Query = Query( + name = "too-many-parameters", title = s"Number of parameters larger than $n", description = s"This query identifies functions with more than $n formal parameters", @@ -72,6 +74,7 @@ object Metrics { } ) + @q def tooHighComplexity(n: Int = 4): Query = Query( title = s"Cyclomatic complexity higher than $n", description = @@ -84,38 +87,10 @@ object Metrics { } ``` -As you can see, each query is implemented in a function that receives -a code property graph (type `Cpg`) and returns a list of findings -(type `List[nodes.NewFinding]`). - -These queries are invoked in sequence in `CodeQualityPass` in the file -`CodeQualityScanner.scala`: - -``` -... -class CodeQualityPass(cpg: Cpg) extends CpgPass(cpg) { - import Metrics._ - /** - * All we do here is call all queries and add a node to - * the graph for each result. - * */ - override def run(): Iterator[DiffGraph] = { - val diffGraph = DiffGraph.newBuilder - (tooManyParameters()(cpg) ++ tooManyLoops()(cpg) ++ tooNested()(cpg) ++ - tooLong()(cpg) ++ tooHighComplexity()(cpg) ++ multipleReturns()(cpg)) - .foreach(diffGraph.addNode) - Iterator(diffGraph.build) - } -} -... -``` -Apart from these query invocations, `CodeQualityScanner.scala` merely -contains boilerplate code that turns the scanner into a Joern extension. - Corresponding tests for queries are located in `src/test/scala/io/joern/scanners`. For example, tests for the metrics queries are located in -`src/test/scala/io/joern/scanners/c/codequality/MetricsTests.scala`: +`src/test/scala/io/joern/scanners/c/MetricsTests.scala`: ``` class MetricsTests extends Suite { @@ -153,83 +128,10 @@ follows: sbt test ``` -Automatic code formatting can be performed as follows: +You can test newly developed queries -``` -sbt scalafmt -sbt test:scalafmt -``` - -## Adding queries to existing scripts - -You can add queries to an existing bundles by creating a new query set -in the script package. For example, query sets for the C scanner can -be placed here: - -https://github.com/joernio/batteries/blob/main/src/main/scala/io/joern/batteries/c/vulnscan/ - -The file [`SampleQuerySet.scala`](https://github.com/joernio/batteries/blob/main/src/main/scala/io/joern/batteries/c/vulnscan/SampleQuerySet.scala) serves as a template. +If you want to test newly created queries with `joern-scan` as follows: ``` -object SampleQuerySet { - - def myQuery1(cpg: Cpg): List[nodes.NewFinding] = { - // Add your query here - } - - def myQuery2(cpg: Cpg): List[nodes.NewFinding] = { - // Add another query here - } - // ... -} - -class SampleQuertSet(cpg: Cpg) extends CpgPass(cpg) { - import SampleQuerySet._ - - override def run(): Iterator[DiffGraph] = { - val diffGraph = DiffGraph.newBuilder - // Execute queries - myQuery1(cpg).foreach(diffGraph.addNode) - myQuery2(cpg).foreach(diffGraph.addNode) - - Iterator(diffGraph.build) - } -``` - -Finally, add -a `runPass` line to the script [here](https://github.com/joernio/batteries/blob/main/src/main/scala/io/joern/batteries/c/vulnscan/CScanner.scala#L23): - -``` -class CScanner(options: CScannerOptions) extends LayerCreator { - override val overlayName: String = CScanner.overlayName - override val description: String = CScanner.description - - override def create(context: LayerCreatorContext, - storeUndoInfo: Boolean): Unit = { - runPass(new IntegerTruncations(context.cpg), context, storeUndoInfo) - // add more `runPass` calls to execute query sets by default - } -``` - -## Adding Tests - -Please add tests for your queries to ensure that they continue functioning. -Tests also serve as a specification for what your queries should and should not do. - -A template for an automated query set test can be found [here](https://github.com/joernio/batteries/blob/main/src/test/scala/io/joern/batteries/c/vulnscan/SampleQuerySetTests.scala) - -``` -package io.joern.batteries.c.vulnscan - -class SampleQuerySetTests extends Suite { - - override val code: String = - """ - void place_your_code_here() {} - """ - - "find ..." in { - // test code goes here - } -} +./install.sh && ./joern-scan ``` diff --git a/build.sbt b/build.sbt index 35e4932..1e7c44b 100644 --- a/build.sbt +++ b/build.sbt @@ -5,8 +5,6 @@ ThisBuild/scalaVersion := "2.13.0" enablePlugins(JavaAppPackaging) enablePlugins(GitVersioning) -lazy val schema = project.in(file("schema")) -dependsOn(schema) libraryDependencies ++= Seq( "com.lihaoyi" %% "upickle" % "1.2.2", "com.github.pathikrit" %% "better-files" % "3.8.0", @@ -19,7 +17,6 @@ libraryDependencies ++= Seq( "io.shiftleft" %% "fuzzyc2cpg" % Versions.cpg % Test, "org.scalatest" %% "scalatest" % "3.1.1" % Test ) -excludeDependencies += ExclusionRule("io.shiftleft", "codepropertygraph-domain-classes_2.13") // We exclude a few jars that the main joern distribution already includes Universal / mappings := (Universal / mappings).value.filterNot { @@ -31,6 +28,9 @@ Universal / mappings := (Universal / mappings).value.filterNot { path.contains("com.lihaoyi.u") } +sources in (Compile,doc) := Seq.empty +publishArtifact in (Compile, packageDoc) := false + lazy val createDistribution = taskKey[Unit]("Create binary distribution of extension") createDistribution := { (Universal/packageZipTarball).value diff --git a/install.sh b/install.sh index 05fcaf2..c273fa2 100755 --- a/install.sh +++ b/install.sh @@ -4,7 +4,7 @@ set -o pipefail set -o nounset set -eu -readonly JOERN_VERSION="v1.1.63" +readonly JOERN_VERSION="v1.1.64" if [ "$(uname)" = 'Darwin' ]; then # get script location @@ -71,10 +71,3 @@ pushd $SCRIPT_ABS_DIR ./joern --add-plugin ./querydb.zip rm lib popd - -echo "Adapting CPG schema" -cp ${SCHEMA_SRC_DIR}/*.json ${JOERN_INSTALL}/schema-extender/schemas/ -pushd $JOERN_INSTALL - ./schema-extender.sh -popd - diff --git a/joern-scan b/joern-scan new file mode 100755 index 0000000..4c2b0e4 --- /dev/null +++ b/joern-scan @@ -0,0 +1,15 @@ +#!/usr/bin/env sh + +if [ "$(uname -s)" = "Darwin" ]; then + SCRIPT_ABS_PATH=$(greadlink -f "$0") +else + SCRIPT_ABS_PATH=$(readlink -f "$0") +fi +SCRIPT_ABS_DIR=$(dirname "$SCRIPT_ABS_PATH") + +if [ "$#" -lt 1 ]; then + echo "Pass in the source directory to scan" + exit 1 +fi + +$SCRIPT_ABS_DIR/joern --run scan --src "$@" diff --git a/project/Versions.scala b/project/Versions.scala index 38c3c02..caae334 100644 --- a/project/Versions.scala +++ b/project/Versions.scala @@ -1,5 +1,5 @@ /* Declare dependency versions in one place */ object Versions { - val cpg = "1.3.16" + val cpg = "1.3.25" val overflowdb = "1.24" } diff --git a/schema/build.sbt b/schema/build.sbt deleted file mode 100644 index c056367..0000000 --- a/schema/build.sbt +++ /dev/null @@ -1,55 +0,0 @@ -name := "schema" - -libraryDependencies ++= Seq( - "io.shiftleft" %% "codepropertygraph-schema" % Versions.cpg, - "io.shiftleft" %% "overflowdb-traversal" % Versions.overflowdb, -) - -import better.files.FileExtensions - -lazy val mergeSchemaTask = taskKey[File]("Merge schemas") -mergeSchemaTask := { - val outputRoot = new File(sourceManaged.in(Compile).value.getAbsolutePath) - val outputFile = outputRoot / "cpg.json" - val schemasDir = new File("schema/src/main/resources/schema") - val schemaFiles = schemasDir.listFiles.toSeq ++ extractOriginalSchema.value - val mergedSchema = overflowdb.codegen.SchemaMerger.mergeCollections(schemaFiles) - outputFile.mkdirs - mergedSchema.toScala.copyTo(outputFile.toScala, overwrite = true) - println(s"successfully merged schemas into $outputFile") - outputFile -} - -Compile / sourceGenerators += Def.task { - val mergedSchemaFile = mergeSchemaTask.value - val outputRoot = new File(sourceManaged.in(Compile).value.getAbsolutePath + "/io/shiftleft/codepropertygraph/generated") - - println(s"generating domain classes from $mergedSchemaFile") - val basePackage = "io.shiftleft.codepropertygraph.generated" - val outputDir = (Compile / sourceManaged).value - new overflowdb.codegen.CodeGen(mergedSchemaFile.getAbsolutePath, basePackage).run(outputDir) - - FileUtils.listFilesRecursively(outputRoot) -}.taskValue - -lazy val extractOriginalSchema = taskKey[Set[File]]("extract original cpg schema from dependency") -extractOriginalSchema := { - val artifactName = "codepropertygraph-schema_2.13" - val cpgSourceJar = updateClassifiers.value - .configurations - .filter(_.configuration.name == "compile") - .flatMap { config => - config.modules.filter(_.module.name == artifactName).flatMap { module => - module.artifacts.collect { case (artifact, file) if artifact.`type` == "src" => file } - } - } - .headOption - .getOrElse(throw new AssertionError(s"unable to find $artifactName from dependencies")) - - val tmpDir = java.nio.file.Files.createTempDirectory("joern-sample-ext") - - val schemaFiles = IO.unzip(cpgSourceJar, tmpDir.toFile, _.endsWith(".json")) - if (schemaFiles.isEmpty) throw new AssertionError(s"unable to find original schema files in $artifactName") - schemaFiles -} - diff --git a/schema/src/main/resources/schema/ext.json b/schema/src/main/resources/schema/ext.json deleted file mode 100644 index 0967ef4..0000000 --- a/schema/src/main/resources/schema/ext.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/src/main/scala/io/joern/scanners/QueryDatabase.scala b/src/main/scala/io/joern/scanners/QueryDatabase.scala new file mode 100644 index 0000000..fdb2a35 --- /dev/null +++ b/src/main/scala/io/joern/scanners/QueryDatabase.scala @@ -0,0 +1,142 @@ +package io.joern.scanners + +/* + * This file is to be moved to `console` in `codepropertygraph` + * */ + +import org.reflections8.Reflections +import org.reflections8.util.{ClasspathHelper, ConfigurationBuilder} +import org.slf4j.{Logger, LoggerFactory} + +import scala.annotation.StaticAnnotation +import scala.jdk.CollectionConverters._ +import scala.reflect.runtime.universe._ +import scala.reflect.runtime.{universe => ru} + +trait QueryBundle +class q() extends StaticAnnotation + +class QueryDatabase( + defaultArgumentProvider: DefaultArgumentProvider = + new DefaultArgumentProvider) { + + private val logger: Logger = LoggerFactory.getLogger(classOf[QueryDatabase]) + + private val runtimeMirror: ru.Mirror = + ru.runtimeMirror(getClass.getClassLoader) + + /** + * Determine all bundles on the class path + * */ + def allBundles: List[Class[_ <: QueryBundle]] = + new Reflections( + new ConfigurationBuilder().setUrls( + ClasspathHelper.forPackage("io.joern.scanners", + ClasspathHelper.contextClassLoader(), + ClasspathHelper.staticClassLoader())) + ).getSubTypesOf(classOf[QueryBundle]).asScala.toList + + /** + * Determine queries across all bundles + * */ + def allQueries: List[Query] = { + allBundles.flatMap { bundle => + queriesInBundle(bundle) + } + } + + /** + * Return all queries inside `bundle`. + * */ + def queriesInBundle[T <: QueryBundle](bundle: Class[T]): List[Query] = { + queryCreatorsInBundle(bundle).map { + case (method, args) => + method.apply(args: _*).asInstanceOf[Query] + } + } + + /** + * Obtain all (methodMirror, args) pairs from bundle, making it possible to override + * default args before creating the query. + * */ + def queryCreatorsInBundle[T <: QueryBundle]( + bundle: Class[T]): List[(ru.MethodMirror, List[Any])] = { + methodsForBundle(bundle).map(m => (m, bundle)).flatMap { + case (method, bundle) => + val args = defaultArgs(method.symbol, classToType(bundle)) + if (args.isDefined) { + List((method, args.get)) + } else { + logger.warn(s"Cannot determine default arguments for query: $method") + List() + } + + } + } + + private def classToType[T](x: Class[T]) = { + runtimeMirror.classSymbol(x).toType + } + + private def methodsForBundle[T <: QueryBundle](bundle: Class[T]) = { + val bundleType = classToType(bundle) + val methods = bundleType.members + .collect { case m if m.isMethod => m.asMethod } + .filter { m => + m.annotations.map(_.tree.tpe.typeSymbol.name.toString).contains("q") + } + + val im = runtimeMirror.reflect( + runtimeMirror + .reflectModule(bundleType.typeSymbol.asClass.module.asModule) + .instance) + methods.map { m => + im.reflectMethod(m) + }.toList + } + + private def defaultArgs(method: MethodSymbol, + bundleType: Type): Option[List[Any]] = { + val runtimeMirror = ru.runtimeMirror(getClass.getClassLoader) + val im = runtimeMirror.reflect( + runtimeMirror + .reflectModule(bundleType.typeSymbol.asClass.module.asModule) + .instance) + val args = (for (ps <- method.paramLists; p <- ps) yield p).zipWithIndex + .map { + case (x, i) => defaultArgumentProvider.defaultArgument(method, im, x, i) + } + if (args.contains(None)) { + None + } else { + Some(args.map(_.get)) + } + } + +} + +/** + * Joern and Ocular require different implicits to be present, and when + * we encounter these implicits as parameters in a query that we invoke + * via reflection, we need to obtain these implicits from somewhere. + * + * We achieve this by implementing a `DefaultArgumentProvider` for Ocular, + * and one for Joern. + * */ +class DefaultArgumentProvider { + + def defaultArgument(method: MethodSymbol, + im: InstanceMirror, + x: Symbol, + i: Int): Option[Any] = { + val typeSignature = im.symbol.typeSignature + val defaultMethodName = s"${method.name}$$default$$${i + 1}" + val m = typeSignature.member(TermName(defaultMethodName)) + if (m.isMethod) { + Some(im.reflectMethod(m.asMethod).apply()) + } else { + None + } + } + +} diff --git a/src/main/scala/io/joern/scanners/Scan.scala b/src/main/scala/io/joern/scanners/Scan.scala new file mode 100644 index 0000000..1188b4f --- /dev/null +++ b/src/main/scala/io/joern/scanners/Scan.scala @@ -0,0 +1,61 @@ +package io.joern.scanners + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.dataflowengineoss.queryengine.EngineContext +import io.shiftleft.passes.{CpgPass, DiffGraph} +import io.shiftleft.semanticcpg.layers.{ + LayerCreator, + LayerCreatorContext, + LayerCreatorOptions +} + +import scala.reflect.runtime.universe._ + +object Scan { + val overlayName = "scan" + val description = "Joern/Ocular Code Scanner" + def defaultOpts = new ScanOptions() +} + +class ScanOptions() extends LayerCreatorOptions {} + +class Scan(options: ScanOptions)(implicit engineContext: EngineContext) + extends LayerCreator { + override val overlayName: String = Scan.overlayName + override val description: String = Scan.description + + override def create(context: LayerCreatorContext, + storeUndoInfo: Boolean): Unit = { + runPass(new ScanPass(context.cpg), context, storeUndoInfo) + outputFindings(context.cpg) + } +} + +class JoernDefaultArgumentProvider(implicit context: EngineContext) + extends DefaultArgumentProvider { + + override def defaultArgument(method: MethodSymbol, + im: InstanceMirror, + x: Symbol, + i: Int): Option[Any] = { + if (x.typeSignature.toString.endsWith("EngineContext")) { + Some(context) + } else { + super.defaultArgument(method, im, x, i) + } + } +} + +class ScanPass(cpg: Cpg)(implicit engineContext: EngineContext) + extends CpgPass(cpg) { + + override def run(): Iterator[DiffGraph] = { + val diffGraph = DiffGraph.newBuilder + val queryDb = new QueryDatabase(new JoernDefaultArgumentProvider()) + queryDb.allQueries.foreach { query => + query(cpg).foreach(diffGraph.addNode) + } + Iterator(diffGraph.build) + } + +} diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/CopyLoops.scala b/src/main/scala/io/joern/scanners/c/CopyLoops.scala similarity index 89% rename from src/main/scala/io/joern/scanners/c/vulnscan/CopyLoops.scala rename to src/main/scala/io/joern/scanners/c/CopyLoops.scala index 61bcc59..d204638 100644 --- a/src/main/scala/io/joern/scanners/c/vulnscan/CopyLoops.scala +++ b/src/main/scala/io/joern/scanners/c/CopyLoops.scala @@ -1,11 +1,13 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c -import io.joern.scanners.language._ +import io.joern.scanners._ import io.shiftleft.semanticcpg.language._ -object CopyLoops { +object CopyLoops extends QueryBundle { + @q def isCopyLoop(): Query = Query( + name = "copy-loop", title = "Copy loop detected", description = """ diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflow.scala b/src/main/scala/io/joern/scanners/c/HeapBasedOverflow.scala similarity index 87% rename from src/main/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflow.scala rename to src/main/scala/io/joern/scanners/c/HeapBasedOverflow.scala index d7f3bd5..a390f8f 100644 --- a/src/main/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflow.scala +++ b/src/main/scala/io/joern/scanners/c/HeapBasedOverflow.scala @@ -1,11 +1,11 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c -import io.joern.scanners.language._ +import io.joern.scanners._ +import io.shiftleft.dataflowengineoss.queryengine.EngineContext import io.shiftleft.semanticcpg.language._ import io.shiftleft.dataflowengineoss.language._ -import io.shiftleft.dataflowengineoss.queryengine.EngineContext -object HeapBasedOverflow { +object HeapBasedOverflow extends QueryBundle { /** * Find calls to malloc where the first argument contains an arithmetic expression, @@ -14,7 +14,9 @@ object HeapBasedOverflow { * an adaption of the old-joern query first shown at 31C3 that found a * buffer overflow in VLC's MP4 demuxer (CVE-2014-9626). * */ + @q def mallocMemcpyIntOverflow()(implicit context: EngineContext): Query = Query( + name = "malloc-memcpy-int-overflow", title = "Dangerous copy-operation into heap-allocated buffer", description = "-", score = 4, { cpg => diff --git a/src/main/scala/io/joern/scanners/c/InsecureFunctions.scala b/src/main/scala/io/joern/scanners/c/InsecureFunctions.scala new file mode 100644 index 0000000..974aadb --- /dev/null +++ b/src/main/scala/io/joern/scanners/c/InsecureFunctions.scala @@ -0,0 +1,22 @@ +package io.joern.scanners.c + +import io.joern.scanners._ +import io.shiftleft.semanticcpg.language._ + +object InsecureFunctions extends QueryBundle { + + @q + def getsUsed(): Query = Query( + name = "call-to-gets", + title = "Insecure function gets() used", + description = + """ + | Avoid gets() function as it can lead to reads beyond buffer boundary and cause + | buffer overflows. Some secure alternatives are fgets() and gets_s(). + |""".stripMargin, + score = 4, { cpg => + cpg.call("gets") + } + ) + +} diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/IntegerTruncations.scala b/src/main/scala/io/joern/scanners/c/IntegerTruncations.scala similarity index 72% rename from src/main/scala/io/joern/scanners/c/vulnscan/IntegerTruncations.scala rename to src/main/scala/io/joern/scanners/c/IntegerTruncations.scala index e397558..a0bb57b 100644 --- a/src/main/scala/io/joern/scanners/c/vulnscan/IntegerTruncations.scala +++ b/src/main/scala/io/joern/scanners/c/IntegerTruncations.scala @@ -1,16 +1,19 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c +import io.joern.scanners._ +import io.joern.scanners.{QueryBundle, q} import io.shiftleft.semanticcpg.language._ -import io.joern.scanners.language._ -object IntegerTruncations { +object IntegerTruncations extends QueryBundle { /** * Identify calls to `strlen` where return values are assigned * to variables of type `int`, potentially causing truncation * on 64 bit platforms. * */ + @q def strlenAssignmentTruncations(): Query = Query( + name = "strlen-truncation", title = "Truncation in assigment involving strlen call", description = "-", score = 2, { cpg => diff --git a/src/main/scala/io/joern/scanners/c/codequality/Metrics.scala b/src/main/scala/io/joern/scanners/c/Metrics.scala similarity index 69% rename from src/main/scala/io/joern/scanners/c/codequality/Metrics.scala rename to src/main/scala/io/joern/scanners/c/Metrics.scala index bc359c4..a604567 100644 --- a/src/main/scala/io/joern/scanners/c/codequality/Metrics.scala +++ b/src/main/scala/io/joern/scanners/c/Metrics.scala @@ -1,61 +1,73 @@ -package io.joern.scanners.c.codequality +package io.joern.scanners.c -import io.joern.scanners.language._ +import io.joern.scanners._ import io.shiftleft.semanticcpg.language._ -object Metrics { +object Metrics extends QueryBundle { + @q def tooManyParameters(n: Int = 4): Query = Query( + name = "too-many-params", title = s"Number of parameters larger than $n", description = s"This query identifies functions with more than $n formal parameters", score = 2.0, { cpg => - cpg.method.filter(_.parameter.size > n) + cpg.method.internal.filter(_.parameter.size > n) } ) + @q def tooHighComplexity(n: Int = 4): Query = Query( + name = "too-high-complexity", title = s"Cyclomatic complexity higher than $n", description = s"This query identifies functions with a cyclomatic complexity higher than $n", score = 2.0, { cpg => - cpg.method.filter(_.controlStructure.size > n) + cpg.method.internal.filter(_.controlStructure.size > n) } ) + @q def tooLong(n: Int = 1000): Query = Query( + name = "too-long", title = s"More than $n lines", description = s"This query identifies functions that are more than $n lines long", score = 2.0, { cpg => - cpg.method.filter(_.numberOfLines > n) + cpg.method.internal.filter(_.numberOfLines > n) } ) + @q def multipleReturns(): Query = Query( + name = "multiple-returns", title = s"Multiple returns", description = "This query identifies functions with more than one return", score = 2.0, { cpg => - cpg.method.filter(_.ast.isReturn.l.size > 1) + cpg.method.internal.filter(_.ast.isReturn.l.size > 1) } ) + @q def tooManyLoops(n: Int = 4): Query = Query( + name = "too-many-loops", title = s"More than $n loops", description = s"This query identifies functions with more than $n loops", score = 2, { cpg => - cpg.method + cpg.method.internal .filter( _.ast.isControlStructure.parserTypeName("(For|Do|While).*").size > n) } ) + @q def tooNested(n: Int = 3): Query = Query( + name = "too-nested", title = s"Nesting level higher than $n", description = s"This query identifies functions with a nesting level higher than $n", score = 2, { cpg => - cpg.method.filter(_.depth(_.isControlStructure) > n) + cpg.method.internal.filter(_.depth(_.isControlStructure) > n) } ) diff --git a/src/main/scala/io/joern/scanners/c/codequality/CodeQualityScanner.scala b/src/main/scala/io/joern/scanners/c/codequality/CodeQualityScanner.scala deleted file mode 100644 index bb3d29e..0000000 --- a/src/main/scala/io/joern/scanners/c/codequality/CodeQualityScanner.scala +++ /dev/null @@ -1,54 +0,0 @@ -package io.joern.scanners.c.codequality - -import io.joern.scanners.language._ -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.passes.{CpgPass, DiffGraph} -import io.shiftleft.semanticcpg.layers.{ - LayerCreator, - LayerCreatorContext, - LayerCreatorOptions -} - -/** - * Joern requires each extension to provide a class derived from `LayerCreator` - * and an associated companion object that provides the extension's name, description - * and a method to retrieve its default options. - * */ -object CodeQualityScanner { - val overlayName = "c-quality-scanner" - val description = "Code quality scanner for C code" - def defaultOpts = new CodeQualityScannerOptions() -} - -class CodeQualityScannerOptions() extends LayerCreatorOptions {} - -class CodeQualityScanner(options: CodeQualityScannerOptions) - extends LayerCreator { - override val overlayName: String = CodeQualityScanner.overlayName - override val description: String = CodeQualityScanner.description - - /** - * This method is called when the scanner is started - * */ - override def create(context: LayerCreatorContext, - storeUndoInfo: Boolean): Unit = { - runPass(new CodeQualityPass(context.cpg), context, storeUndoInfo) - outputFindings(context.cpg) - } -} - -class CodeQualityPass(cpg: Cpg) extends CpgPass(cpg) { - import Metrics._ - - /** - * All we do here is call all queries and add a node to - * the graph for each result. - * */ - override def run(): Iterator[DiffGraph] = { - val diffGraph = DiffGraph.newBuilder - (tooManyParameters()(cpg) ++ tooManyLoops()(cpg) ++ tooNested()(cpg) ++ - tooLong()(cpg) ++ tooHighComplexity()(cpg) ++ multipleReturns()(cpg)) - .foreach(diffGraph.addNode) - Iterator(diffGraph.build) - } -} diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/CVulnScanner.scala b/src/main/scala/io/joern/scanners/c/vulnscan/CVulnScanner.scala deleted file mode 100644 index 7fa055f..0000000 --- a/src/main/scala/io/joern/scanners/c/vulnscan/CVulnScanner.scala +++ /dev/null @@ -1,47 +0,0 @@ -package io.joern.scanners.c.vulnscan - -import io.joern.scanners.language._ -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.dataflowengineoss.queryengine.EngineContext -import io.shiftleft.passes.{CpgPass, DiffGraph} -import io.shiftleft.semanticcpg.layers.{ - LayerCreator, - LayerCreatorContext, - LayerCreatorOptions -} - -object CScanner { - val overlayName = "c-vuln-scan" - val description = "Vulnerability scanner for C code" - def defaultOpts = new CScannerOptions() -} - -class CScannerOptions() extends LayerCreatorOptions {} - -class CScanner(options: CScannerOptions)(implicit engineContext: EngineContext) - extends LayerCreator { - override val overlayName: String = CScanner.overlayName - override val description: String = CScanner.description - - override def create(context: LayerCreatorContext, - storeUndoInfo: Boolean): Unit = { - runPass(new CScannerPass(context.cpg), context, storeUndoInfo) - outputFindings(context.cpg) - } -} - -class CScannerPass(cpg: Cpg)(implicit engineContext: EngineContext) - extends CpgPass(cpg) { - override def run(): Iterator[DiffGraph] = { - val diffGraph = DiffGraph.newBuilder - IntegerTruncations - .strlenAssignmentTruncations()(cpg) - .foreach(diffGraph.addNode) - - val x = HeapBasedOverflow.mallocMemcpyIntOverflow() - x(cpg).foreach(diffGraph.addNode) - CopyLoops.isCopyLoop()(cpg).foreach(diffGraph.addNode) - InsecureFunctions.getsUsed()(cpg).foreach(diffGraph.addNode) - Iterator(diffGraph.build) - } -} diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/InsecureFunctions.scala b/src/main/scala/io/joern/scanners/c/vulnscan/InsecureFunctions.scala deleted file mode 100644 index 5671fc9..0000000 --- a/src/main/scala/io/joern/scanners/c/vulnscan/InsecureFunctions.scala +++ /dev/null @@ -1,20 +0,0 @@ -package io.joern.scanners.c.vulnscan - -import io.joern.scanners.language._ -import io.shiftleft.semanticcpg.language._ -import io.shiftleft.dataflowengineoss.language._ -import io.shiftleft.dataflowengineoss.queryengine.EngineContext - -object InsecureFunctions { - - def getsUsed(): Query = Query( - title = "Insecure function gets() used", - description = - "Avoid gets() function as it can lead to reads beyond buffer boundary and cause buffer overlfows. Some secure alternatives are fgets() and gets_s()", - score = 4, { cpg => - cpg - .call("gets") - } - ) - -} diff --git a/src/main/scala/io/joern/scanners/c/vulnscan/SampleQuerySet.scala b/src/main/scala/io/joern/scanners/c/vulnscan/SampleQuerySet.scala deleted file mode 100644 index 77bdbc9..0000000 --- a/src/main/scala/io/joern/scanners/c/vulnscan/SampleQuerySet.scala +++ /dev/null @@ -1,31 +0,0 @@ -package io.joern.scanners.c.vulnscan - -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes -import io.shiftleft.passes.{CpgPass, DiffGraph} - -object SampleQuerySet { - - def myQuery1(cpg: Cpg): List[nodes.NewFinding] = { - ??? - } - - def myQuery2(cpg: Cpg): List[nodes.NewFinding] = { - ??? - } - // ... -} - -class SampleQuertSet(cpg: Cpg) extends CpgPass(cpg) { - - import SampleQuerySet._ - - override def run(): Iterator[DiffGraph] = { - val diffGraph = DiffGraph.newBuilder - myQuery1(cpg).foreach(diffGraph.addNode) - myQuery2(cpg).foreach(diffGraph.addNode) - // ... - Iterator(diffGraph.build) - } - -} diff --git a/src/main/scala/io/joern/scanners/language/ScannerStarters.scala b/src/main/scala/io/joern/scanners/language/ScannerStarters.scala deleted file mode 100644 index 8083106..0000000 --- a/src/main/scala/io/joern/scanners/language/ScannerStarters.scala +++ /dev/null @@ -1,12 +0,0 @@ -package io.joern.scanners.language - -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.{NodeTypes, nodes} -import overflowdb.traversal._ - -class ScannerStarters(val cpg: Cpg) extends AnyVal { - - def finding: Traversal[nodes.Finding] = - cpg.graph.nodes(NodeTypes.FINDING).cast[nodes.Finding] - -} diff --git a/src/main/scala/io/joern/scanners/language/package.scala b/src/main/scala/io/joern/scanners/package.scala similarity index 84% rename from src/main/scala/io/joern/scanners/language/package.scala rename to src/main/scala/io/joern/scanners/package.scala index de386d2..e31bce3 100644 --- a/src/main/scala/io/joern/scanners/language/package.scala +++ b/src/main/scala/io/joern/scanners/package.scala @@ -1,13 +1,23 @@ -package io.joern.scanners +package io.joern import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes -import overflowdb.traversal._ +import io.shiftleft.codepropertygraph.generated.{NodeTypes, nodes} +import overflowdb.traversal.Traversal import io.shiftleft.semanticcpg.language._ -package object language { +import overflowdb.traversal._ + +package object scanners { + + implicit class ScannerStarters(val cpg: Cpg) extends AnyVal { - case class Query(title: String, + def finding: Traversal[nodes.Finding] = + cpg.graph.nodes(NodeTypes.FINDING).cast[nodes.Finding] + + } + + case class Query(name: String, + title: String, description: String, score: Double, f: Cpg => Traversal[nodes.StoredNode]) { @@ -21,9 +31,6 @@ package object language { } } - implicit def toScannerStarters(cpg: Cpg): ScannerStarters = - new ScannerStarters(cpg) - object FindingKeys { val title = "title" val description = "description" diff --git a/src/test/scala/io/joern/scanners/c/vulnscan/CopyLoopTests.scala b/src/test/scala/io/joern/scanners/c/CopyLoopTests.scala similarity index 88% rename from src/test/scala/io/joern/scanners/c/vulnscan/CopyLoopTests.scala rename to src/test/scala/io/joern/scanners/c/CopyLoopTests.scala index a3b5c49..73b5342 100644 --- a/src/test/scala/io/joern/scanners/c/vulnscan/CopyLoopTests.scala +++ b/src/test/scala/io/joern/scanners/c/CopyLoopTests.scala @@ -1,7 +1,5 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c -import io.joern.scanners.c.Suite -import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.nodes import io.shiftleft.semanticcpg.language._ diff --git a/src/test/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflowTests.scala b/src/test/scala/io/joern/scanners/c/HeapBasedOverflowTests.scala similarity index 91% rename from src/test/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflowTests.scala rename to src/test/scala/io/joern/scanners/c/HeapBasedOverflowTests.scala index d7e432b..e979bd2 100644 --- a/src/test/scala/io/joern/scanners/c/vulnscan/HeapBasedOverflowTests.scala +++ b/src/test/scala/io/joern/scanners/c/HeapBasedOverflowTests.scala @@ -1,6 +1,5 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c -import io.joern.scanners.c.Suite import io.shiftleft.codepropertygraph.generated.nodes class HeapBasedOverflowTests extends Suite { diff --git a/src/test/scala/io/joern/scanners/c/vulnscan/InsecureFunctionsTests.scala b/src/test/scala/io/joern/scanners/c/InsecureFunctionsTests.scala similarity index 87% rename from src/test/scala/io/joern/scanners/c/vulnscan/InsecureFunctionsTests.scala rename to src/test/scala/io/joern/scanners/c/InsecureFunctionsTests.scala index 40d16d8..58c6095 100644 --- a/src/test/scala/io/joern/scanners/c/vulnscan/InsecureFunctionsTests.scala +++ b/src/test/scala/io/joern/scanners/c/InsecureFunctionsTests.scala @@ -1,7 +1,5 @@ -package io.joern.scanners.c.vulnscan +package io.joern.scanners.c -import io.joern.scanners.c.Suite -import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.nodes import io.shiftleft.semanticcpg.language._ diff --git a/src/test/scala/io/joern/scanners/c/vulnscan/IntegerTruncationsTests.scala b/src/test/scala/io/joern/scanners/c/IntegerTruncationsTests.scala similarity index 91% rename from src/test/scala/io/joern/scanners/c/vulnscan/IntegerTruncationsTests.scala rename to src/test/scala/io/joern/scanners/c/IntegerTruncationsTests.scala index e9bc87b..806ddcf 100644 --- a/src/test/scala/io/joern/scanners/c/vulnscan/IntegerTruncationsTests.scala +++ b/src/test/scala/io/joern/scanners/c/IntegerTruncationsTests.scala @@ -1,5 +1,5 @@ -package io.joern.scanners.c.vulnscan -import io.joern.scanners.c.Suite +package io.joern.scanners.c + import io.shiftleft.codepropertygraph.generated.nodes import io.shiftleft.semanticcpg.language._ diff --git a/src/test/scala/io/joern/scanners/c/codequality/MetricsTests.scala b/src/test/scala/io/joern/scanners/c/MetricsTests.scala similarity index 97% rename from src/test/scala/io/joern/scanners/c/codequality/MetricsTests.scala rename to src/test/scala/io/joern/scanners/c/MetricsTests.scala index ee9e35f..623b148 100644 --- a/src/test/scala/io/joern/scanners/c/codequality/MetricsTests.scala +++ b/src/test/scala/io/joern/scanners/c/MetricsTests.scala @@ -1,6 +1,5 @@ -package io.joern.scanners.c.codequality +package io.joern.scanners.c -import io.joern.scanners.c.Suite import io.shiftleft.codepropertygraph.generated.nodes class MetricsTests extends Suite { diff --git a/src/test/scala/io/joern/scanners/c/QueryDatabaseTests.scala b/src/test/scala/io/joern/scanners/c/QueryDatabaseTests.scala new file mode 100644 index 0000000..ad8762e --- /dev/null +++ b/src/test/scala/io/joern/scanners/c/QueryDatabaseTests.scala @@ -0,0 +1,28 @@ +package io.joern.scanners.c + +import io.joern.scanners.QueryDatabase + +class QueryDatabaseTests extends Suite { + + "QueryDatabase" should { + + "contain Metrics bundle" in { + new QueryDatabase().allBundles.count { bundle => + bundle.getName == "io.joern.scanners.c.Metrics$" + } shouldBe 1 + } + + "contain `tooManyParameters` query" in { + val qdb = new QueryDatabase() + val metricsBundles = qdb.allBundles.filter { bundle => + bundle.getName == "io.joern.scanners.c.Metrics$" + } + metricsBundles.size shouldBe 1 + val metricsBundle = metricsBundles.head + val queries = qdb.queriesInBundle(metricsBundle) + queries.count(_.title == s"Number of parameters larger than 4") shouldBe 1 + } + + } + +} diff --git a/src/test/scala/io/joern/scanners/c/ScanTests.scala b/src/test/scala/io/joern/scanners/c/ScanTests.scala new file mode 100644 index 0000000..a3e4be2 --- /dev/null +++ b/src/test/scala/io/joern/scanners/c/ScanTests.scala @@ -0,0 +1,15 @@ +package io.joern.scanners.c + +import io.joern.scanners.{JoernDefaultArgumentProvider, QueryDatabase} + +class ScanTests extends Suite { + + "Scan" should { + + "not crash when loading all queries" in { + new QueryDatabase(new JoernDefaultArgumentProvider()).allQueries.size should be > 0 + } + + } + +} diff --git a/src/test/scala/io/joern/scanners/c/vulnscan/SampleQuerySetTests.scala b/src/test/scala/io/joern/scanners/c/vulnscan/SampleQuerySetTests.scala deleted file mode 100644 index dae87c5..0000000 --- a/src/test/scala/io/joern/scanners/c/vulnscan/SampleQuerySetTests.scala +++ /dev/null @@ -1,16 +0,0 @@ -package io.joern.scanners.c.vulnscan - -import io.joern.scanners.c.Suite - -class SampleQuerySetTests extends Suite { - - override val code: String = - """ - void place_your_code_here() {} - """ - - "find ..." in { - // test code goes here - } - -}