Source code for compiler_gym.envs.gcc.datasets.anghabench

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
from pathlib import Path
from typing import Optional

from compiler_gym.datasets import TarDatasetWithManifest

# TODO( This can be merged
# with the LLVM implementation.
[docs]class AnghaBenchDataset(TarDatasetWithManifest): """A dataset of C programs curated from GitHub source code. The dataset is from: da Silva, Anderson Faustino, Bruno Conde Kind, José Wesley de Souza Magalhaes, Jerônimo Nunes Rocha, Breno Campos Ferreira Guimaraes, and Fernando Magno Quinão Pereira. "ANGHABENCH: A Suite with One Million Compilable C Benchmarks for Code-Size Reduction." In 2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO), pp. 378-390. IEEE, 2021. And is available at: """ def __init__( self, site_data_base: Path, sort_order: int = 0, manifest_url: Optional[str] = None, manifest_sha256: Optional[str] = None, deprecated: Optional[str] = None, name: Optional[str] = None, ): manifest_url_, manifest_sha256_ = { "darwin": ( "", "96ead63da5f8efa07fd0370f0c6e452b59bed840828b8b19402102b1ce3ee109", ), "linux": ( "", "14df85f650199498cf769715e9f0d7841d09f9fa62a95b8ecc242bdaf227f33a", ), }[sys.platform] super().__init__( name=name or "benchmark://anghabench-v1", description="Compile-only C/C++ functions extracted from GitHub", references={ "Paper": "", "Homepage": "", }, license="Unknown. See:", site_data_base=site_data_base, manifest_urls=[manifest_url or manifest_url_], manifest_sha256=manifest_sha256 or manifest_sha256_, tar_urls=[ "" ], tar_sha256="85d068e4ce44f2581e3355ee7a8f3ccb92568e9f5bd338bc3a918566f3aff42f", strip_prefix="AnghaBench-d8034ac8562b8c978376008f4b33df01b8887b19", tar_compression="gz", benchmark_file_suffix=".c", sort_order=sort_order, deprecated=deprecated, )