From 3a14b5bc3824a2e6cf370748b679edbd459d3081 Mon Sep 17 00:00:00 2001 From: Adrien Piquerez Date: Thu, 11 Nov 2021 16:15:32 +0100 Subject: [PATCH 1/2] add metrics scheduler --- .../scaladex/server/service/Metrics.scala | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 server/src/main/scala/scaladex/server/service/Metrics.scala diff --git a/server/src/main/scala/scaladex/server/service/Metrics.scala b/server/src/main/scala/scaladex/server/service/Metrics.scala new file mode 100644 index 000000000..a83abf98e --- /dev/null +++ b/server/src/main/scala/scaladex/server/service/Metrics.scala @@ -0,0 +1,70 @@ +package scaladex.server.service + +import ch.epfl.scala.services.storage.sql.SqlRepo +import com.typesafe.scalalogging.LazyLogging +import java.time.Instant +import scala.concurrent.ExecutionContext +import scala.concurrent.Future +import ch.epfl.scala.services.storage.sql.tables.ReleaseTable + +class Metrics(db: SqlRepo)(implicit ec: ExecutionContext) extends LazyLogging { + def run(): Future[Unit] = { + logger.info(ReleaseTable.countProjects("3").sql) + def global = for { + artifacts <- db.countDistinctArtifacts() + releases <- db.countReleases() + projects <- db.countProjects() + githubInfos <- db.getAllGithubInfos() + scala210Projects <- db.countProjects("2.10") + scala211Projects <- db.countProjects("2.11") + scala212Projects <- db.countProjects("2.12") + scala213Projects <- db.countProjects("2.13") + scala3Projects <- db.countProjects("3") + } yield { + val contributors = githubInfos.flatMap(g => g.contributors).distinct.size + logger.info(s"Total artifacts: $artifacts") + logger.info(s"Total releases: $releases") + logger.info(s"Total projects: $projects") + logger.info(s"Total contributos: $contributors") + logger.info(s"Scala 2.10 projects: $scala210Projects") + logger.info(s"Scala 2.11 projects: $scala211Projects") + logger.info(s"Scala 2.12 projects: $scala212Projects") + logger.info(s"Scala 2.13 projects: $scala213Projects") + logger.info(s"Scala 3 projects: $scala3Projects") + } + + def yearly = { + def instant(year: Int) = Instant.parse(s"$year-01-01T00:00:00.00Z") + (2012 to 2021).foldLeft(Future.successful(())) { (f, year) => + val from = instant(year) + val to = instant(year + 1) + for { + _ <- f + projects <- db.countProjects(from, to) + } yield { + logger.info(s"New projects in $year: $projects") + } + } + } + + def monthly = { + def instant(month:Int, year: Int) = Instant.parse(s"$year-${String.format("%02d", month)}-01T00:00:00.00Z") + val months = for { + year <- 2012 to 2021 + month <- 1 to 12 + } yield { + val (nextYear, nextMonth) = if (month + 1 == 13) (year + 1, 1) else (year, month + 1) + () => db.countReleases(instant(month, year), instant(nextMonth, nextYear)).map { releases => + logger.info(s"Releases in $year-${String.format("%02d", month)}: $releases") + } + } + months.foldLeft(Future.successful(())) { (f, month) => f.flatMap(_=> month()) } + } + + for { + _ <- global + _ <- yearly + _ <- monthly + } yield () + } +} From f577efc39c35f4412f584d6019542cffab7ff493 Mon Sep 17 00:00:00 2001 From: Adrien Piquerez Date: Mon, 11 Apr 2022 10:00:09 +0200 Subject: [PATCH 2/2] Update metrics scheduler --- .../scala/scaladex/infra/SqlDatabase.scala | 13 ++++ .../scaladex/infra/sql/ArtifactTable.scala | 8 +++ .../scaladex/infra/sql/ProjectTable.scala | 7 ++ .../server/service/AdminService.scala | 7 +- .../scaladex/server/service/Metrics.scala | 44 ++++++++++++ .../src/main/scala/scaladex/view/Job.scala | 5 ++ .../scaladex/server/service/Metrics.scala | 70 ------------------- 7 files changed, 81 insertions(+), 73 deletions(-) create mode 100644 modules/server/src/main/scala/scaladex/server/service/Metrics.scala delete mode 100644 server/src/main/scala/scaladex/server/service/Metrics.scala diff --git a/modules/infra/src/main/scala/scaladex/infra/SqlDatabase.scala b/modules/infra/src/main/scala/scaladex/infra/SqlDatabase.scala index 9301a5c37..00c62422d 100644 --- a/modules/infra/src/main/scala/scaladex/infra/SqlDatabase.scala +++ b/modules/infra/src/main/scala/scaladex/infra/SqlDatabase.scala @@ -1,6 +1,8 @@ package scaladex.infra import java.time.Instant +import java.time.OffsetDateTime +import java.time.ZoneOffset import java.util.UUID import scala.concurrent.ExecutionContext.Implicits.global @@ -33,6 +35,7 @@ import scaladex.infra.sql.ProjectTable import scaladex.infra.sql.ReleaseDependenciesTable import scaladex.infra.sql.ReleaseTable import scaladex.infra.sql.UserSessionsTable +import scaladex.core.model.BinaryVersion class SqlDatabase(datasource: HikariDataSource, xa: doobie.Transactor[IO]) extends SchedulerDatabase with LazyLogging { private val flyway = DoobieUtils.flyway(datasource) @@ -150,6 +153,16 @@ class SqlDatabase(datasource: HikariDataSource, xa: doobie.Transactor[IO]) exten def countProjects(): Future[Long] = run(ProjectTable.countProjects.unique) + def countProjects(year: Int): Future[Long] = { + val instant = OffsetDateTime.of(year + 1, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant + run(ProjectTable.countProjectsUntil.unique(instant)) + } + + def getProjectsByYear(year: Int): Future[Seq[(Project.Reference, Language)]] = { + val instant = OffsetDateTime.of(year + 1, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant + run(ArtifactTable.selectProjectsUntil.to[Seq](instant)) + } + override def countArtifacts(): Future[Long] = run(ArtifactTable.count.unique) diff --git a/modules/infra/src/main/scala/scaladex/infra/sql/ArtifactTable.scala b/modules/infra/src/main/scala/scaladex/infra/sql/ArtifactTable.scala index f531e7c74..623a1e36b 100644 --- a/modules/infra/src/main/scala/scaladex/infra/sql/ArtifactTable.scala +++ b/modules/infra/src/main/scala/scaladex/infra/sql/ArtifactTable.scala @@ -160,4 +160,12 @@ object ArtifactTable { groupBy = Seq("group_id", "artifact_id") ) } + + val selectProjectsUntil: Query[Instant, (Project.Reference, Language)] = + selectRequest1[Instant, (Project.Reference, Language)]( + table, + Seq("organization", "repository", "language_version"), + where = Seq("release_date < ?"), + groupBy = Seq("organization", "repository", "language_version") + ) } diff --git a/modules/infra/src/main/scala/scaladex/infra/sql/ProjectTable.scala b/modules/infra/src/main/scala/scaladex/infra/sql/ProjectTable.scala index 31c6c11c6..dab9a3c20 100644 --- a/modules/infra/src/main/scala/scaladex/infra/sql/ProjectTable.scala +++ b/modules/infra/src/main/scala/scaladex/infra/sql/ProjectTable.scala @@ -45,6 +45,13 @@ object ProjectTable { val countProjects: Query0[Long] = selectRequest(table, Seq("count(*)")) + val countProjectsUntil: Query[Instant, Long] = + selectRequest1( + table, + Seq("count(*)"), + where = Seq("creation_date < ?", "github_status!='Moved'", "github_status!='NotFound'") + ) + val selectByReference: Query[Project.Reference, Project] = selectRequest(fullTable, allFields, referenceFields.map(f => s"p.$f")) diff --git a/modules/server/src/main/scala/scaladex/server/service/AdminService.scala b/modules/server/src/main/scala/scaladex/server/service/AdminService.scala index 6acd34d91..7ac529012 100644 --- a/modules/server/src/main/scala/scaladex/server/service/AdminService.scala +++ b/modules/server/src/main/scala/scaladex/server/service/AdminService.scala @@ -12,15 +12,15 @@ import scaladex.core.model.Project import scaladex.core.model.Project.Settings import scaladex.core.model.UserState import scaladex.core.service.GithubClient -import scaladex.core.service.SchedulerDatabase import scaladex.core.service.SearchEngine import scaladex.core.util.ScalaExtensions._ +import scaladex.infra.SqlDatabase import scaladex.view.Job import scaladex.view.Task class AdminService( env: Env, - database: SchedulerDatabase, + database: SqlDatabase, searchEngine: SearchEngine, githubClientOpt: Option[GithubClient], sonatypeSynchronizer: SonatypeService @@ -39,7 +39,8 @@ class AdminService( new JobScheduler(Job.projectDependencies, projectDependenciesUpdater.updateAll), new JobScheduler(Job.projectCreationDates, updateProjectCreationDate), new JobScheduler(Job.moveArtifacts, artifactsService.moveAll), - new JobScheduler(Job.userSessions, userSessionService.updateAll) + new JobScheduler(Job.userSessions, userSessionService.updateAll), + new JobScheduler(Job.metrics, (new Metrics(database)).run) ) ++ githubClientOpt.map { client => val githubUpdater = new GithubUpdater(database, client) diff --git a/modules/server/src/main/scala/scaladex/server/service/Metrics.scala b/modules/server/src/main/scala/scaladex/server/service/Metrics.scala new file mode 100644 index 000000000..12d367fab --- /dev/null +++ b/modules/server/src/main/scala/scaladex/server/service/Metrics.scala @@ -0,0 +1,44 @@ +package scaladex.server.service + +import scala.concurrent.ExecutionContext +import scala.concurrent.Future + +import cats.implicits.toTraverseOps +import com.typesafe.scalalogging.LazyLogging +import scaladex.infra.SqlDatabase +import scaladex.core.model.Scala + +class Metrics(db: SqlDatabase)(implicit ec: ExecutionContext) extends LazyLogging { + def run(): Future[String] = { + val years: Seq[Int] = Range.inclusive(2013, 2022) + for { + projectsByYear <- years.traverse(db.getProjectsByYear) + projects <- db.getAllProjects() + } yield { + val projectMap = projects.map(p => p.reference -> p).toMap + years.zip(projectsByYear).foreach { case (year, projects) => + logger.info(s"$year:") + val all = projects + .groupMap(_._1)(_._2) + .view + .filterKeys(k => projectMap.get(k).filter(p => !p.githubStatus.isMoved && !p.githubStatus.isNotFound).nonEmpty) + .values.map(_.toSet).toSeq + val scala3 = all.count(_.contains(Scala.`3`)) + val scala213 = all.count(ls => ls.contains(Scala.`2.13`) && !ls.contains(Scala.`3`)) + val scala212 = all.count(ls => ls.contains(Scala.`2.12`) && !ls.contains(Scala.`2.13`) && !ls.contains(Scala.`3`)) + val scala211 = all.count(ls => ls.contains(Scala.`2.11`) && !ls.contains(Scala.`2.12`) && !ls.contains(Scala.`2.13`) && !ls.contains(Scala.`3`)) + val scala210 = all.count(ls => ls.contains(Scala.`2.10`) && !ls.contains(Scala.`2.11`) && !ls.contains(Scala.`2.12`) && !ls.contains(Scala.`2.13`) && !ls.contains(Scala.`3`)) + logger.info(s" Scala 2.10: $scala210") + logger.info(s" Scala 2.11: $scala211") + logger.info(s" Scala 2.12: $scala212") + logger.info(s" Scala 2.13: $scala213") + logger.info(s" Scala 3: $scala3") + } + val filteredProjects = projects.filter(p => !p.githubStatus.isMoved && !p.githubStatus.isNotFound) + logger.info(s"total projects: ${filteredProjects.size}") + val contributors = filteredProjects.flatMap(_.githubInfo).flatMap(_.contributors).map(_.login).distinct.size + logger.info(s"total contributors: $contributors") + "Success" + } + } +} diff --git a/modules/template/src/main/scala/scaladex/view/Job.scala b/modules/template/src/main/scala/scaladex/view/Job.scala index 20ae3f03f..64df86675 100644 --- a/modules/template/src/main/scala/scaladex/view/Job.scala +++ b/modules/template/src/main/scala/scaladex/view/Job.scala @@ -44,6 +44,11 @@ object Job { "Find missing artifacts in Maven Central of the known group IDs.", 24.hours ) + val metrics: Job = Job( + "metrics", + "Print regular metrics into the logs", + 24.hours + ) case class Status(state: State, results: Seq[Result], progress: Option[Progress]) { def isStarted: Boolean = state.isInstanceOf[Started] diff --git a/server/src/main/scala/scaladex/server/service/Metrics.scala b/server/src/main/scala/scaladex/server/service/Metrics.scala deleted file mode 100644 index a83abf98e..000000000 --- a/server/src/main/scala/scaladex/server/service/Metrics.scala +++ /dev/null @@ -1,70 +0,0 @@ -package scaladex.server.service - -import ch.epfl.scala.services.storage.sql.SqlRepo -import com.typesafe.scalalogging.LazyLogging -import java.time.Instant -import scala.concurrent.ExecutionContext -import scala.concurrent.Future -import ch.epfl.scala.services.storage.sql.tables.ReleaseTable - -class Metrics(db: SqlRepo)(implicit ec: ExecutionContext) extends LazyLogging { - def run(): Future[Unit] = { - logger.info(ReleaseTable.countProjects("3").sql) - def global = for { - artifacts <- db.countDistinctArtifacts() - releases <- db.countReleases() - projects <- db.countProjects() - githubInfos <- db.getAllGithubInfos() - scala210Projects <- db.countProjects("2.10") - scala211Projects <- db.countProjects("2.11") - scala212Projects <- db.countProjects("2.12") - scala213Projects <- db.countProjects("2.13") - scala3Projects <- db.countProjects("3") - } yield { - val contributors = githubInfos.flatMap(g => g.contributors).distinct.size - logger.info(s"Total artifacts: $artifacts") - logger.info(s"Total releases: $releases") - logger.info(s"Total projects: $projects") - logger.info(s"Total contributos: $contributors") - logger.info(s"Scala 2.10 projects: $scala210Projects") - logger.info(s"Scala 2.11 projects: $scala211Projects") - logger.info(s"Scala 2.12 projects: $scala212Projects") - logger.info(s"Scala 2.13 projects: $scala213Projects") - logger.info(s"Scala 3 projects: $scala3Projects") - } - - def yearly = { - def instant(year: Int) = Instant.parse(s"$year-01-01T00:00:00.00Z") - (2012 to 2021).foldLeft(Future.successful(())) { (f, year) => - val from = instant(year) - val to = instant(year + 1) - for { - _ <- f - projects <- db.countProjects(from, to) - } yield { - logger.info(s"New projects in $year: $projects") - } - } - } - - def monthly = { - def instant(month:Int, year: Int) = Instant.parse(s"$year-${String.format("%02d", month)}-01T00:00:00.00Z") - val months = for { - year <- 2012 to 2021 - month <- 1 to 12 - } yield { - val (nextYear, nextMonth) = if (month + 1 == 13) (year + 1, 1) else (year, month + 1) - () => db.countReleases(instant(month, year), instant(nextMonth, nextYear)).map { releases => - logger.info(s"Releases in $year-${String.format("%02d", month)}: $releases") - } - } - months.foldLeft(Future.successful(())) { (f, month) => f.flatMap(_=> month()) } - } - - for { - _ <- global - _ <- yearly - _ <- monthly - } yield () - } -}