# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
from pathlib import Path
from typing import Iterable, Optional
from compiler_gym.datasets import Dataset, TarDatasetWithManifest
from compiler_gym.envs.llvm.datasets.anghabench import AnghaBenchDataset
from compiler_gym.envs.llvm.datasets.cbench import (
CBenchDataset,
CBenchLegacyDataset,
CBenchLegacyDataset2,
)
from compiler_gym.envs.llvm.datasets.chstone import CHStoneDataset
from compiler_gym.envs.llvm.datasets.clgen import CLgenDataset
from compiler_gym.envs.llvm.datasets.csmith import CsmithBenchmark, CsmithDataset
from compiler_gym.envs.llvm.datasets.jotaibench import JotaiBenchDataset
from compiler_gym.envs.llvm.datasets.llvm_stress import LlvmStressDataset
from compiler_gym.envs.llvm.datasets.poj104 import POJ104Dataset, POJ104LegacyDataset
from compiler_gym.util.runfiles_path import site_data_path
[docs]class BlasDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://blas-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-blas-v0.tar.bz2"
],
tar_sha256="e724a8114709f8480adeb9873d48e426e8d9444b00cddce48e342b9f0f2b096d",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-blas-v0-manifest.bz2"
],
manifest_sha256="6946437dcb0da5fad3ed8a7fd83eb4294964198391d5537b1310e22d7ceebff4",
references={
"Paper": "https://strum355.netsoc.co/books/PDF/Basic%20Linear%20Algebra%20Subprograms%20for%20Fortran%20Usage%20-%20BLAS%20(1979).pdf",
"Homepage": "http://www.netlib.org/blas/",
},
license="BSD 3-Clause",
strip_prefix="blas-v0",
description="Basic linear algebra kernels",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]class GitHubDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
manifest_url, manifest_sha256 = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-github-v0-macos-manifest.bz2",
"10d933a7d608248be286d756b27813794789f7b87d8561c241d0897fb3238503",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-github-v0-linux-manifest.bz2",
"aede9ca78657b4694ada9a4592d93f0bbeb3b3bd0fff3b537209850228480d3b",
),
}[sys.platform]
super().__init__(
name="benchmark://github-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-github-v0.tar.bz2"
],
tar_sha256="880269dd7a5c2508ea222a2e54c318c38c8090eb105c0a87c595e9dd31720764",
manifest_urls=[manifest_url],
manifest_sha256=manifest_sha256,
license="CC BY 4.0",
references={
"Paper": "https://arxiv.org/pdf/2012.01470.pdf",
},
strip_prefix="github-v0",
description="Compile-only C/C++ objects from GitHub",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]class LinuxDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
manifest_url, manifest_sha256 = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-linux-v0-macos-manifest.bz2",
"dfc87b94c7a43e899e76507398a5af22178aebaebcb5d7e24e82088aeecb0690",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-linux-v0-linux-manifest.bz2",
"32ceb8576f683798010816ac605ee496f386ddbbe64be9e0796015d247a73f92",
),
}[sys.platform]
super().__init__(
name="benchmark://linux-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-linux-v0.tar.bz2"
],
tar_sha256="a1ae5c376af30ab042c9e54dc432f89ce75f9ebaee953bc19c08aff070f12566",
manifest_urls=[manifest_url],
manifest_sha256=manifest_sha256,
references={"Homepage": "https://www.linux.org/"},
license="GPL-2.0",
strip_prefix="linux-v0",
description="Compile-only object files from C Linux kernel",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]class MibenchDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://mibench-v1",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v1.tar.bz2"
],
tar_sha256="795b80d3198bc96e394823a4cb294d256845beffccce52fea0e3446395212bb5",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v0-manifest.bz2"
],
manifest_sha256="8ed985d685b48f444a3312cd84ccc5debda4a839850e442a3cdc93910ba0dc5f",
references={
"Paper": "http://vhosts.eecs.umich.edu/mibench/Publications/MiBench.pdf"
},
license="BSD 3-Clause",
strip_prefix="mibench-v1",
description="C benchmarks",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
class MibenchV0Dataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://mibench-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v0.tar.bz2"
],
tar_sha256="128c090c40b955b99fdf766da167a5f642018fb35c16a1d082f63be2e977eb13",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v0-manifest.bz2"
],
manifest_sha256="8ed985d685b48f444a3312cd84ccc5debda4a839850e442a3cdc93910ba0dc5f",
references={
"Paper": "http://vhosts.eecs.umich.edu/mibench/Publications/MiBench.pdf"
},
license="BSD 3-Clause",
strip_prefix="mibench-v0",
description="C benchmarks",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
deprecated="Please use mibench-v1",
)
[docs]class NPBDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://npb-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-npb-v0.tar.bz2"
],
tar_sha256="793ac2e7a4f4ed83709e8a270371e65b724da09eaa0095c52e7f4209f63bb1f2",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-npb-v0-manifest.bz2"
],
manifest_sha256="89eccb7f1b0b9e1f82b9b900b9f686ff5b189a2a67a4f8969a15901cd315dba2",
references={
"Paper": "http://optout.csc.ncsu.edu/~mueller/codeopt/codeopt05/projects/www4.ncsu.edu/~pgauria/csc791a/papers/NAS-95-020.pdf"
},
license="NASA Open Source Agreement v1.3",
strip_prefix="npb-v0",
description="NASA Parallel Benchmarks",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]class OpenCVDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://opencv-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-opencv-v0.tar.bz2"
],
tar_sha256="003df853bd58df93572862ca2f934c7b129db2a3573bcae69a2e59431037205c",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-opencv-v0-manifest.bz2"
],
manifest_sha256="8de96f722fab18f3a2a74db74b4038c7947fe8b3da867c9260206fdf5338cd81",
references={
"Paper": "https://mipro-proceedings.com/sites/mipro-proceedings.com/files/upload/sp/sp_008.pdf",
"Homepage": "https://opencv.org/",
},
license="Apache 2.0",
strip_prefix="opencv-v0",
description="Compile-only object files from C++ OpenCV library",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]class TensorFlowDataset(TarDatasetWithManifest):
def __init__(self, site_data_base: Path, sort_order: int = 0):
super().__init__(
name="benchmark://tensorflow-v0",
tar_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-tensorflow-v0.tar.bz2"
],
tar_sha256="f77dd1988c772e8359e1303cc9aba0d73d5eb27e0c98415ac3348076ab94efd1",
manifest_urls=[
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-tensorflow-v0-manifest.bz2"
],
manifest_sha256="cffc45cd10250d483cb093dec913c8a7da64026686284cccf404623bd1da6da8",
references={
"Paper": "https://www.usenix.org/system/files/conference/osdi16/osdi16-abadi.pdf",
"Homepage": "https://www.tensorflow.org/",
},
license="Apache 2.0",
strip_prefix="tensorflow-v0",
description="Compile-only object files from C++ TensorFlow library",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
)
[docs]def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset]:
"""Instantiate the builtin LLVM datasets.
:param site_data_base: The root of the site data path.
:return: An iterable sequence of :class:`Dataset
<compiler_gym.datasets.Dataset>` instances.
"""
site_data_base = site_data_base or site_data_path("llvm-v0")
yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0)
# Add legacy version of Anghabench using an old manifest.
anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
"39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
"a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
),
}[sys.platform]
yield AnghaBenchDataset(
name="benchmark://anghabench-v0",
site_data_base=site_data_base,
sort_order=0,
manifest_url=anghabench_v0_manifest_url,
manifest_sha256=anghabench_v0_manifest_sha256,
deprecated="Please use anghabench-v1",
)
yield JotaiBenchDataset(site_data_base=site_data_base)
yield BlasDataset(site_data_base=site_data_base, sort_order=0)
yield CLgenDataset(site_data_base=site_data_base, sort_order=0)
yield CBenchDataset(site_data_base=site_data_base)
# Add legacy version of cbench-v1 in which the 'b' was capitalized. This
# is deprecated and will be removed no earlier than v0.1.10.
yield CBenchLegacyDataset2(
site_data_base=site_data_base,
name="benchmark://cBench-v1",
deprecated=(
"Please use 'benchmark://cbench-v1' (note the lowercase name). "
"The dataset is the same, only the name has changed"
),
manifest_url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-manifest.bz2",
manifest_sha256="635b94eeb2784dfedb3b53fd8f84517c3b4b95d851ddb662d4c1058c72dc81e0",
sort_order=100,
)
yield CBenchLegacyDataset(site_data_base=site_data_base)
yield CHStoneDataset(site_data_base=site_data_base)
yield CsmithDataset(site_data_base=site_data_base, sort_order=0)
yield GitHubDataset(site_data_base=site_data_base, sort_order=0)
yield LinuxDataset(site_data_base=site_data_base, sort_order=0)
yield LlvmStressDataset(site_data_base=site_data_base, sort_order=0)
yield MibenchDataset(site_data_base=site_data_base, sort_order=0)
yield MibenchV0Dataset(site_data_base=site_data_base, sort_order=100)
yield NPBDataset(site_data_base=site_data_base, sort_order=0)
yield OpenCVDataset(site_data_base=site_data_base, sort_order=0)
yield POJ104Dataset(site_data_base=site_data_base, sort_order=0)
yield POJ104LegacyDataset(site_data_base=site_data_base, sort_order=100)
yield TensorFlowDataset(site_data_base=site_data_base, sort_order=0)
__all__ = [
"AnghaBenchDataset",
"BlasDataset",
"CBenchDataset",
"CBenchLegacyDataset",
"CLgenDataset",
"CsmithBenchmark",
"CsmithDataset",
"get_llvm_datasets",
"GitHubDataset",
"JotaiBenchDataset",
"LinuxDataset",
"LlvmStressDataset",
"MibenchDataset",
"NPBDataset",
"OpenCVDataset",
"POJ104Dataset",
"POJ104LegacyDataset",
"TensorFlowDataset",
]