Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees
from vulnerabilities.pipelines.v2_improvers import compute_advisory_content_hash
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
from vulnerabilities.pipelines.v2_improvers import (
Expand Down Expand Up @@ -74,5 +75,6 @@
compute_advisory_todo.ComputeToDo,
collect_ssvc_trees.CollectSSVCPipeline,
relate_severities.RelateSeveritiesPipeline,
compute_advisory_content_hash.ComputeAdvisoryContentHash,
]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.2.11 on 2026-03-11 08:46

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0115_impactedpackageaffecting_and_more"),
]

operations = [
migrations.AddField(
model_name="advisoryv2",
name="advisory_content_hash",
field=models.CharField(
blank=True,
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
max_length=64,
null=True,
),
),
]
36 changes: 7 additions & 29 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3010,6 +3010,13 @@ class AdvisoryV2(models.Model):
help_text="Related advisories that are used to calculate the severity of this advisory.",
)

advisory_content_hash = models.CharField(
max_length=64,
blank=True,
null=True,
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
)

@property
def risk_score(self):
"""
Expand Down Expand Up @@ -3078,35 +3085,6 @@ def get_aliases(self):
"""
return self.aliases.all()

def compute_advisory_content(self):
"""
Compute a unique content hash for an advisory by normalizing its data and hashing it.

:param advisory: An Advisory object
:return: SHA-256 hash digest as content hash
"""
normalized_data = {
"summary": normalize_text(self.summary),
"impacted_packages": sorted(
[impact.to_dict() for impact in self.impacted_packages.all()],
key=lambda x: json.dumps(x, sort_keys=True),
),
"patches": sorted(
[patch.to_patch_data().to_dict() for patch in self.patches.all()],
key=lambda x: json.dumps(x, sort_keys=True),
),
"severities": sorted(
[sev.to_vulnerability_severity_data().to_dict() for sev in self.severities.all()],
key=lambda x: (x.get("system"), x.get("value")),
),
"weaknesses": normalize_list([weakness.cwe_id for weakness in self.weaknesses.all()]),
}

normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()

return content_hash

alias = get_aliases


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def load_advisories(

fixed_version_range = None
try:
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
if version:
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
except InvalidVersion as e:
logger(
f"{version!r} is not a valid AlpineVersion {e!r}",
Expand Down
15 changes: 8 additions & 7 deletions vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,19 +330,20 @@ def to_version_ranges(self, versions_data, fixed_versions):
"=": "=",
}
comparator = comparator_by_range_expression.get(range_expression)
if comparator:
if comparator and version_value and version_value not in self.ignorable_versions:
constraints.append(
VersionConstraint(comparator=comparator, version=SemverVersion(version_value))
)

for fixed_version in fixed_versions:
# The VersionConstraint method `invert()` inverts the fixed_version's comparator,
# enabling inclusion of multiple fixed versions with the `affected_version_range` values.
constraints.append(
VersionConstraint(
comparator="=",
version=SemverVersion(fixed_version),
).invert()
)
if fixed_version and fixed_version not in self.ignorable_versions:
constraints.append(
VersionConstraint(
comparator="=",
version=SemverVersion(fixed_version),
).invert()
)

return ApacheVersionRange(constraints=constraints)
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
spdx_license_expression = "CC0-1.0"
license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt"
repo_url = "git+https://github.com/dependabot/elixir-security-advisories"
run_once = True

precedence = 200

Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/pipelines/v2_importers/gitlab_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def parse_gitlab_advisory(
original_advisory_text=json.dumps(gitlab_advisory, indent=2, ensure_ascii=False),
)
affected_version_range = None
fixed_version_range = None
fixed_versions = gitlab_advisory.get("fixed_versions") or []
affected_range = gitlab_advisory.get("affected_range")
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
Expand Down Expand Up @@ -285,7 +286,8 @@ def parse_gitlab_advisory(
if affected_version_range:
vrc = affected_version_range.__class__

fixed_version_range = vrc.from_versions(parsed_fixed_versions)
if parsed_fixed_versions:
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
if not fixed_version_range and not affected_version_range:
return

Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/pipelines/v2_importers/ruby_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def get_affected_packages(record, purl):
affected_packages = []
for unaffected_version in record.get("unaffected_versions", []):
try:
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
if unaffected_version:
unaffected_version = unaffected_version.strip()
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
validate_comparators(affected_version_range.constraints)
affected_packages.append(
AffectedPackageV2(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#


from aboutcode.pipeline import LoopProgress

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.utils import compute_advisory_content


class ComputeAdvisoryContentHash(VulnerableCodePipeline):
"""Compute Advisory Content Hash for Advisory."""

pipeline_id = "compute_advisory_content_hash_v2"

@classmethod
def steps(cls):
return (cls.compute_advisory_content_hash,)

def compute_advisory_content_hash(self):
"""Create ToDos for missing summary, affected and fixed packages."""

advisories = AdvisoryV2.objects.filter(advisory_content_hash__isnull=True)

advisories_count = advisories.count()

self.log(
f"Checking missing summary, affected and fixed packages in {advisories_count} Advisories"
)
progress = LoopProgress(
total_iterations=advisories_count,
logger=self.log,
progress_step=1,
)

to_update = []
batch_size = 5000

for advisory in progress.iter(advisories.iterator(chunk_size=batch_size)):
advisory.advisory_content_hash = compute_advisory_content(advisory)
to_update.append(advisory)

if len(to_update) >= batch_size:
AdvisoryV2.objects.bulk_update(
to_update,
["advisory_content_hash"],
batch_size=batch_size,
)
to_update.clear()

if to_update:
AdvisoryV2.objects.bulk_update(
to_update,
["advisory_content_hash"],
batch_size=batch_size,
)

self.log("Finished computing advisory_content_hash")
3 changes: 3 additions & 0 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from vulnerabilities.models import VulnerabilitySeverity
from vulnerabilities.models import Weakness
from vulnerabilities.pipes.univers_utils import get_exact_purls_v2
from vulnerabilities.utils import compute_advisory_content


def get_or_create_aliases(aliases: List) -> QuerySet:
Expand Down Expand Up @@ -301,6 +302,7 @@ def insert_advisory_v2(
advisory_obj = None
created = False
content_id = compute_content_id_v2(advisory_data=advisory)
advisory_content_hash = compute_advisory_content(advisory_data=advisory)
try:
default_data = {
"datasource_id": pipeline_id,
Expand All @@ -311,6 +313,7 @@ def insert_advisory_v2(
"original_advisory_text": advisory.original_advisory_text,
"url": advisory.url,
"precedence": precedence,
"advisory_content_hash": advisory_content_hash,
}

advisory_obj, created = AdvisoryV2.objects.get_or_create(
Expand Down
33 changes: 32 additions & 1 deletion vulnerabilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,11 @@ def group_advisories_by_content(advisories):
grouped = {}

for advisory in advisories:
content_hash = advisory.compute_advisory_content()
content_hash = (
advisory.advisory_content_hash
if advisory.advisory_content_hash
else compute_advisory_content(advisory)
)

entry = grouped.setdefault(
content_hash,
Expand All @@ -867,3 +871,30 @@ def group_advisories_by_content(advisories):
entry["secondary"].add(advisory)

return grouped


def compute_advisory_content(advisory_data):
"""
Compute a unique content hash for an advisory by normalizing its data and hashing it.

:param advisory_data: An AdvisoryData object
:return: SHA-256 hash digest as content hash
"""
from vulnerabilities.models import AdvisoryV2

if isinstance(advisory_data, AdvisoryV2):
advisory_data = advisory_data.to_advisory_data()
normalized_data = {
"summary": normalize_text(advisory_data.summary),
"affected_packages": [
pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg
],
"severities": [sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev],
"weaknesses": normalize_list(advisory_data.weaknesses),
"patches": [patch.to_dict() for patch in normalize_list(advisory_data.patches)],
}

normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()

return content_hash
Loading