Source code for compiler_gym.envs.llvm.datasets.llvm_stress

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import subprocess
from pathlib import Path
from typing import Iterable

import numpy as np

from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.datasets.uri import BenchmarkUri
from compiler_gym.errors import BenchmarkInitError
from compiler_gym.third_party import llvm
from compiler_gym.util.commands import Popen

# The maximum value for the --seed argument to llvm-stress.
UINT_MAX = (2**32) - 1


[docs]class LlvmStressDataset(Dataset): """A dataset which uses llvm-stress to generate programs. `llvm-stress <https://llvm.org/docs/CommandGuide/llvm-stress.html>`_ is a tool for generating random LLVM-IR files. This dataset forces reproducible results by setting the input seed to the generator. The benchmark's URI is the seed, e.g. "generator://llvm-stress-v0/10" is the benchmark generated by llvm-stress using seed 10. The total number of unique seeds is 2^32 - 1. Note that llvm-stress is a tool that is used to find errors in LLVM. As such, there is a higher likelihood that the benchmark cannot be used for an environment and that :meth:`env.reset() <compiler_gym.envs.CompilerEnv.reset>` will raise :class:`BenchmarkInitError <compiler_gym.datasets.BenchmarkInitError>`. """ def __init__(self, site_data_base: Path, sort_order: int = 0): super().__init__( name="generator://llvm-stress-v0", description="Randomly generated LLVM-IR", references={ "Documentation": "https://llvm.org/docs/CommandGuide/llvm-stress.html" }, license="Apache License v2.0 with LLVM Exceptions", site_data_base=site_data_base, sort_order=sort_order, ) @property def size(self) -> int: # Actually 2^32 - 1, but practically infinite for all intents and # purposes. return 0 def benchmark_uris(self) -> Iterable[str]: return (f"{self.name}/{i}" for i in range(UINT_MAX)) def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: seed = int(uri.path[1:]) return self.benchmark_from_seed(seed) def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark: seed = random_state.integers(UINT_MAX) return self.benchmark_from_seed(seed) def benchmark_from_seed(self, seed: int) -> Benchmark: """Get a benchmark from a uint32 seed. :param seed: A number in the range 0 <= n < 2^32. :return: A benchmark instance. """ self.install() # Run llvm-stress with the given seed and pipe the output to llvm-as to # assemble a bitcode. try: with Popen( [str(llvm.llvm_stress_path()), f"--seed={seed}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as llvm_stress: with Popen( [str(llvm.llvm_as_path()), "-"], stdin=llvm_stress.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as llvm_as: stdout, _ = llvm_as.communicate(timeout=60) llvm_stress.communicate(timeout=60) if llvm_stress.returncode or llvm_as.returncode: raise BenchmarkInitError("Failed to generate benchmark") except subprocess.TimeoutExpired: raise BenchmarkInitError("Benchmark generation timed out") return Benchmark.from_file_contents(f"{self.name}/{seed}", stdout)