Skip to content

Commit

Permalink
Reference: #3954
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

Signed-off-by: Jono Yang <[email protected]>

Signed-off-by: Jono Yang <[email protected]>

addded support to parse labels in dockerfile

Signed-off-by: Varsha U N <[email protected]>
  • Loading branch information
AyanSinhaMahapatra authored and VarshaUN committed Jan 11, 2025
1 parent bc78721 commit 610689c
Show file tree
Hide file tree
Showing 16 changed files with 297 additions and 5 deletions.
2 changes: 1 addition & 1 deletion requirements-linux.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
packagedcode-msitools==0.101.210706
regipy==3.1.0
rpm-inspector-rpm==4.16.1.3.210404
go-inspector==0.3.1
go-inspector==0.5.0
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jaraco.functools==4.1.0
javaproperties==0.8.1
Jinja2==3.1.3
jsonstreams==0.6.0
license-expression==30.3.0
license-expression==30.4.0
lxml==5.1.0
MarkupSafe==2.1.5
more-itertools==8.13.0
Expand Down
2 changes: 1 addition & 1 deletion setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ packages =
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
regipy >= 3.1.0; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
go-inspector >= 0.3.1; platform_system == 'Linux'
go-inspector >= 0.5.0; platform_system == 'Linux'


[options.entry_points]
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ install_requires =
javaproperties >= 0.5
jinja2 >= 2.7.0
jsonstreams >= 0.5.0
license_expression >= 30.1.1
license_expression >= 30.4.0
lxml >= 4.9.2
MarkupSafe >= 2.1.2
packageurl_python >= 0.9.0
Expand Down Expand Up @@ -149,7 +149,7 @@ packages =
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
regipy >= 3.1.0; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
go-inspector >= 0.3.1; platform_system == 'Linux'
go-inspector >= 0.5.0; platform_system == 'Linux'


[options.entry_points]
Expand Down
1 change: 1 addition & 0 deletions src/licensedcode/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ def select_ngrams(ngrams, with_pos=False):
>>> list(select_ngrams(x for x in [(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)]))
[(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)]
"""
ngram = None
last = None
for pos, ngram in enumerate(ngrams):
# FIXME: use a proper hash
Expand Down
2 changes: 2 additions & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from packagedcode import debian
from packagedcode import debian_copyright
from packagedcode import distro
from packagedcode import dockerfile
from packagedcode import conda
from packagedcode import conan
from packagedcode import cocoapods
Expand Down Expand Up @@ -97,6 +98,7 @@
debian.DebianSourcePackageTarballHandler,

distro.EtcOsReleaseHandler,
dockerfile.DockerfileHandler,

freebsd.CompactManifestHandler,

Expand Down
59 changes: 59 additions & 0 deletions src/packagedcode/dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#



import io
from pathlib import Path
from dockerfile_parse import DockerfileParser
from packagedcode import models
from packagedcode import utils
import fnmatch


class DockerfileHandler(models.DatafileHandler):
datasource_id = 'dockerfile_oci_labels'

@classmethod
def is_datafile(cls, path):
patterns = ['Dockerfile', 'containerfile', '*.dockerfile']
filename=os.path.basename(path)
for pattern in patterns:
if fnmatch.fnmatch(filename, pattern):
return True
return False

@classmethod
def parse(cls, location, package_only=False):
"""
Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata.
"""
labels = cls.extract_oci_labels_from_dockerfile(location)
package_data = {
'datasource_id': cls.datasource_id,
'type': cls.default_package_type,
'name': labels.get('name', 'None'),
'version': labels.get('version', 'None'),
'license_expression': labels.get('license', 'None'),
'labels': labels,
}

yield models.PackageData.from_data(package_data, package_only)

@classmethod
def extract_oci_labels_from_dockerfile(cls, dockerfile_path):
"""
Extract OCI labels from the Dockerfile using DockerfileParser.
"""
labels = {}
parser = DockerfileParser()
with open(dockerfile_path, 'r') as dockerfile:
parser.content = dockerfile.read()
labels = parser.labels
return labels
8 changes: 8 additions & 0 deletions src/packagedcode/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,11 @@ def _parse(

if TRACE:
raise

except Exception as e:
# We should continue when an Exception has occured when trying to
# recognize a package
if TRACE:
logger_debug(f'_parse: Exception: {str(e)}')

continue
Binary file not shown.
13 changes: 13 additions & 0 deletions tests/packagedcode/data/docker/containerfile-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"name": "Unknown",
"version": "Unknown",
"license_expression": "GPL-2.0-only AND BSD-2-Clause",
"labels": {
"source": "https://github.com/kubernetes-sigs/blixt",
"licenses": "GPL-2.0-only,BSD-2-Clause"
}
}
]
10 changes: 10 additions & 0 deletions tests/packagedcode/data/docker/psql-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"license_expression": "MIT",
"labels": {
"source": "https://github.com/kreneskyp/ix"
}
}
]
5 changes: 5 additions & 0 deletions tests/packagedcode/data/docker/psql.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM postgres:15.3
LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix

RUN apt update -y && \
apt install -y postgresql-15-pgvector \
18 changes: 18 additions & 0 deletions tests/packagedcode/data/docker/test-dockerfile-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"datasource_id": "dockerfile_oci_labels",
"type": "default",
"name": "Kanboard",
"version": "1.2.42",
"license_expression": "MIT",
"labels": {
"source": "https://github.com/kanboard/kanboard",
"title": "Kanboard",
"description": "Kanboard is project management software that focuses on the Kanban methodology",
"vendor": "Kanboard",
"licenses": "MIT",
"url": "https://kanboard.org",
"documentation": "https://docs.kanboard.org"
}
}
]
80 changes: 80 additions & 0 deletions tests/packagedcode/data/docker/test.containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#Copied from https://github.com/kubernetes-sigs/blixt/blob


FROM rust:1.79-slim-bookworm as builder

ARG TARGETARCH
ARG LLVM_VERSION=19

RUN apt-get update
RUN apt-get install --yes \
build-essential \
protobuf-compiler \
pkg-config \
musl-tools \
clang \
wget

RUN apt install --yes lsb-release software-properties-common gnupg
RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh
RUN chmod +x /tmp/llvm.sh
RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all"

RUN rustup default stable
RUN rustup install nightly
RUN rustup component add rust-src --toolchain nightly
RUN --mount=type=cache,target=/root/.cargo/registry \
cargo install bpf-linker

WORKDIR /workspace
# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention.
# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's
# convention to a file for later usage.
RUN if [ "$TARGETARCH" = "amd64" ]; \
then echo "x86_64" >> arch; \
else echo "aarch64" >> arch; \
fi
RUN rustup target add $(eval cat arch)-unknown-linux-musl

COPY dataplane dataplane
COPY tools/udp-test-server tools/udp-test-server
COPY xtask xtask
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock
COPY .cargo .cargo

# We need to tell bpf-linker where it can find LLVM's shared library file.
# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48
ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib"
ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld"

RUN --mount=type=cache,target=/workspace/target/ \
--mount=type=cache,target=/root/.cargo/registry \
cargo xtask build-ebpf --release
RUN --mount=type=cache,target=/workspace/target/ \
--mount=type=cache,target=/root/.cargo/registry \
RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \
--workspace \
--exclude ebpf \
--release \
--target=$(eval cat arch)-unknown-linux-musl
RUN --mount=type=cache,target=/workspace/target/ \
cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release

FROM alpine

LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt
LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause

WORKDIR /opt/blixt/

COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane

COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0
COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause

ENTRYPOINT ["/opt/blixt/dataplane"]
36 changes: 36 additions & 0 deletions tests/packagedcode/data/docker/test.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#Copied from https://github.com/kanboard/kanboard

FROM alpine:3.21

LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard
LABEL org.opencontainers.image.title=Kanboard
LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology"
LABEL org.opencontainers.image.vendor=Kanboard
LABEL org.opencontainers.image.licenses=MIT
LABEL org.opencontainers.image.url=https://kanboard.org
LABEL org.opencontainers.image.documentation=https://docs.kanboard.org

VOLUME /var/www/app/data
VOLUME /var/www/app/plugins
VOLUME /etc/nginx/ssl

EXPOSE 80 443

ARG VERSION

RUN apk --no-cache --update add \
tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \
php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \
php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \
php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \
rm -rf /var/www/localhost && \
rm -f /etc/php83/php-fpm.d/www.conf && \
ln -sf /usr/bin/php83 /usr/bin/php

ADD . /var/www/app
ADD docker/ /

RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD []
60 changes: 60 additions & 0 deletions tests/packagedcode/test_dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from packagedcode import dockerfile
import pytest
import os.path
import json
from pathlib import Path
from packagedcode.dockerfile import DockerfileHandler

class TestDockerfileHandler:

def get_test_loc(self, path):
return Path(os.path.join(os.path.dirname(__file__), 'data'))

def load_expected(self, expected_file):
with open(expected_file) as f:
return json.load(f)

def test_is_datafile(self):
dockerfiles = [
'test.dockerfile',
'test.containerfile',
'psql.dockerfile'
]
for dockerfile in dockerfiles:
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
assert DockerfileHandler.is_datafile(str(test_file))

def test_parse_dockerfile(self):
test_files = [
('test.dockerfile', 'test-dockerfile-expected.json'),
('test.containerfile', 'containerfile-expected.json'),
('psql.dockerfile', 'psql-expected.json')
]
for dockerfile, expected in test_files:
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
expected_loc = self.get_test_loc(f'data/docker/{expected}')
packages = list(DockerfileHandler.parse(str(test_file)))
expected_packages = self.load_expected(expected_loc)
assert packages == expected_packages

def test_extract_oci_labels_from_dockerfile(self, mocker):
dockerfiles = [
'test.dockerfile',
'test.containerfile',
'psql.dockerfile'
]
for dockerfile in dockerfiles:
dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}')
labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path))
expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}')
expected_labels = self.load_expected(expected_loc)[0]['labels']
assert labels == expected_labels

0 comments on commit 610689c

Please sign in to comment.