Source code for compiler_gym.envs.compiler_env

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""This module defines the OpenAI gym interface for compilers."""
from abc import ABC, abstractmethod
from typing import Iterable, List, Optional, Tuple, Union

import gym
from gym.spaces import Space

from compiler_gym.compiler_env_state import CompilerEnvState
from compiler_gym.datasets import Benchmark, BenchmarkUri, Dataset
from compiler_gym.spaces import Reward
from compiler_gym.util.gym_type_hints import (
    ActionType,
    ObservationType,
    OptionalArgumentValue,
    StepType,
)
from compiler_gym.validation_result import ValidationResult
from compiler_gym.views import ObservationSpaceSpec, ObservationView, RewardView


[docs]class CompilerEnv(gym.Env, ABC): """An OpenAI gym environment for compiler optimizations. The easiest way to create a CompilerGym environment is to call :code:`gym.make()` on one of the registered environments: >>> env = gym.make("llvm-v0") See :code:`compiler_gym.COMPILER_GYM_ENVS` for a list of registered environment names. Alternatively, an environment can be constructed directly, such as by connecting to a running compiler service at :code:`localhost:8080` (see :doc:`this document </compiler_gym/service>` for more details): >>> env = ClientServiceCompilerEnv( ... service="localhost:8080", ... observation_space="features", ... reward_space="runtime", ... rewards=[env_reward_spaces], ... ) Once constructed, an environment can be used in exactly the same way as a regular :code:`gym.Env`, e.g. >>> observation = env.reset() >>> cumulative_reward = 0 >>> for i in range(100): >>> action = env.action_space.sample() >>> observation, reward, done, info = env.step(action) >>> cumulative_reward += reward >>> if done: >>> break >>> print(f"Reward after {i} steps: {cumulative_reward}") Reward after 100 steps: -0.32123 """
[docs] @abstractmethod def __init__(self): """Construct an environment. Do not construct an environment directly. Use :code:`gym.make()` on one of the registered environments: >>> with gym.make("llvm-v0") as env: ... pass # Use environment """ raise NotImplementedError("abstract class")
[docs] @abstractmethod def close(self): """Close the environment. Once closed, :func:`reset` must be called before the environment is used again. .. note:: You must make sure to call :code:`env.close()` on a CompilerGym environment when you are done with it. This is needed to perform manual tidying up of temporary files and processes. See :ref:`the FAQ <faq:Do I need to call env.close()?>` for more details. """ raise NotImplementedError("abstract method")
@property @abstractmethod def observation_space_spec(self) -> ObservationSpaceSpec: raise NotImplementedError("abstract method") @observation_space_spec.setter @abstractmethod def observation_space_spec( self, observation_space_spec: Optional[ObservationSpaceSpec] ): raise NotImplementedError("abstract method") @property @abstractmethod def reward_space_spec(self) -> Optional[Reward]: raise NotImplementedError("abstract method") @reward_space_spec.setter @abstractmethod def reward_space_spec(self, val: Optional[Reward]): raise NotImplementedError("abstract method") @property @abstractmethod def benchmark(self) -> Benchmark: """Get or set the benchmark to use. :getter: Get :class:`Benchmark <compiler_gym.datasets.Benchmark>` that is currently in use. :setter: Set the benchmark to use. Either a :class:`Benchmark <compiler_gym.datasets.Benchmark>` instance, or the URI of a benchmark as in :meth:`env.datasets.benchmark_uris() <compiler_gym.datasets.Datasets.benchmark_uris>`. .. note:: Setting a new benchmark has no effect until :func:`env.reset() <compiler_gym.envs.CompilerEnv.reset>` is called. """ raise NotImplementedError("abstract method") @benchmark.setter @abstractmethod def benchmark(self, benchmark: Optional[Union[str, Benchmark, BenchmarkUri]]): raise NotImplementedError("abstract method") @property @abstractmethod def datasets(self) -> Iterable[Dataset]: raise NotImplementedError("abstract method") @datasets.setter @abstractmethod def datasets(self, datasets: Iterable[Dataset]): raise NotImplementedError("abstract method") @property @abstractmethod def episode_walltime(self) -> float: """Return the amount of time in seconds since the last call to :meth:`reset() <compiler_gym.envs.CompilerEnv.reset>`. """ raise NotImplementedError("abstract method") @property @abstractmethod def in_episode(self) -> bool: """Whether the service is ready for :func:`step` to be called, i.e. :func:`reset` has been called and :func:`close` has not. :return: :code:`True` if in an episode, else :code:`False`. """ raise NotImplementedError("abstract method") @property @abstractmethod def episode_reward(self) -> Optional[float]: """If :func:`CompilerEnv.reward_space <compiler_gym.envs.CompilerGym.reward_space>` is set, this value is the sum of all rewards for the current episode. """ raise NotImplementedError("abstract method") @episode_reward.setter @abstractmethod def episode_reward(self, episode_reward: Optional[float]): raise NotImplementedError("abstract method") @property @abstractmethod def actions(self) -> List[ActionType]: raise NotImplementedError("abstract method") @property @abstractmethod def version(self) -> str: """The version string of the compiler service.""" raise NotImplementedError("abstract method") @property @abstractmethod def compiler_version(self) -> str: """The version string of the underlying compiler that this service supports.""" raise NotImplementedError("abstract method") @property @abstractmethod def state(self) -> CompilerEnvState: """The tuple representation of the current environment state.""" raise NotImplementedError("abstract method") @property @abstractmethod def action_space(self) -> Space: """The current action space. :getter: Get the current action space. :setter: Set the action space to use. Must be an entry in :code:`action_spaces`. If :code:`None`, the default action space is selected. """ raise NotImplementedError("abstract method") @action_space.setter @abstractmethod def action_space(self, action_space: Optional[str]): raise NotImplementedError("abstract method") @property @abstractmethod def action_spaces(self) -> List[str]: """A list of supported action space names.""" raise NotImplementedError("abstract method") @action_spaces.setter @abstractmethod def action_spaces(self, action_spaces: List[str]): raise NotImplementedError("abstract method") @property @abstractmethod def reward_space(self) -> Optional[Reward]: """The default reward space that is used to return a reward value from :func:`~step()`. :getter: Returns a :class:`Reward <compiler_gym.spaces.Reward>`, or :code:`None` if not set. :setter: Set the default reward space. """ raise NotImplementedError("abstract method") @reward_space.setter @abstractmethod def reward_space(self, reward_space: Optional[Union[str, Reward]]) -> None: raise NotImplementedError("abstract method") @property @abstractmethod def observation_space(self) -> Optional[Space]: """The observation space that is used to return an observation value in :func:`~step()`. :getter: Returns the underlying observation space, or :code:`None` if not set. :setter: Set the default observation space. """ raise NotImplementedError("abstract method") @observation_space.setter @abstractmethod def observation_space( self, observation_space: Optional[Union[str, ObservationSpaceSpec]] ) -> None: raise NotImplementedError("abstract method") @property @abstractmethod def observation(self) -> ObservationView: """A view of the available observation spaces that permits on-demand computation of observations. """ raise NotImplementedError("abstract method") @observation.setter @abstractmethod def observation(self, observation: ObservationView) -> None: raise NotImplementedError("abstract method") @property @abstractmethod def reward_range(self) -> Tuple[float, float]: """A tuple indicating the range of reward values. Default range is (-inf, +inf). """ raise NotImplementedError("abstract method") @property @abstractmethod def reward(self) -> RewardView: """A view of the available reward spaces that permits on-demand computation of rewards. """ raise NotImplementedError("abstract method") @reward.setter @abstractmethod def reward(self, reward: RewardView) -> None: raise NotImplementedError("abstract method")
[docs] @abstractmethod def fork(self) -> "CompilerEnv": """Fork a new environment with exactly the same state. This creates a duplicate environment instance with the current state. The new environment is entirely independently of the source environment. The user must call :meth:`close() <compiler_gym.envs.CompilerEnv.close>` on the original and new environments. If not already in an episode, :meth:`reset() <compiler_gym.envs.CompilerEnv.reset>` is called. Example usage: >>> env = gym.make("llvm-v0") >>> env.reset() # ... use env >>> new_env = env.fork() >>> new_env.state == env.state True >>> new_env.step(1) == env.step(1) True .. note:: The client/service implementation of CompilerGym means that the forked and base environments share a common backend resource. This means that if either of them crash, such as due to a compiler assertion, both environments must be reset. :return: A new environment instance. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def reset( # pylint: disable=arguments-differ self, benchmark: Optional[Union[str, Benchmark]] = None, action_space: Optional[str] = None, observation_space: Union[ OptionalArgumentValue, str, ObservationSpaceSpec ] = OptionalArgumentValue.UNCHANGED, reward_space: Union[ OptionalArgumentValue, str, Reward ] = OptionalArgumentValue.UNCHANGED, timeout: float = 300, ) -> Optional[ObservationType]: """Reset the environment state. This method must be called before :func:`step()`. :param benchmark: The name of the benchmark to use. If provided, it overrides any value that was set during :func:`__init__`, and becomes subsequent calls to :code:`reset()` will use this benchmark. If no benchmark is provided, and no benchmark was provided to :func:`__init___`, the service will randomly select a benchmark to use. :param action_space: The name of the action space to use. If provided, it overrides any value that set during :func:`__init__`, and subsequent calls to :code:`reset()` will use this action space. If no action space is provided, the default action space is used. :param observation_space: Compute and return observations at each :func:`step()` from this space. Accepts a string name or an :class:`ObservationSpaceSpec <compiler_gym.views.ObservationSpaceSpec>`. If :code:`None`, :func:`step()` returns :code:`None` for the observation value. If :code:`OptionalArgumentValue.UNCHANGED` (the default value), the observation space remains unchanged from the previous episode. For available spaces, see :class:`env.observation.spaces <compiler_gym.views.ObservationView>`. :param reward_space: Compute and return reward at each :func:`step()` from this space. Accepts a string name or a :class:`Reward <compiler_gym.spaces.Reward>`. If :code:`None`, :func:`step()` returns :code:`None` for the reward value. If :code:`OptionalArgumentValue.UNCHANGED` (the default value), the observation space remains unchanged from the previous episode. For available spaces, see :class:`env.reward.spaces <compiler_gym.views.RewardView>`. :param timeout: The maximum number of seconds to wait for reset to succeed. :return: The initial observation. :raises BenchmarkInitError: If the benchmark is invalid. This can happen if the benchmark contains code that the compiler does not support, or because of some internal error within the compiler. In this case, another benchmark must be used. :raises TypeError: If no benchmark has been set, and the environment does not have a default benchmark to select from. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def step( self, action: ActionType, observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, rewards: Optional[Iterable[Union[str, Reward]]] = None, timeout: float = 300, ) -> StepType: """Take a step. :param action: An action. :param observation_spaces: A list of observation spaces to compute observations from. If provided, this changes the :code:`observation` element of the return tuple to be a list of observations from the requested spaces. The default :code:`env.observation_space` is not returned. :param reward_spaces: A list of reward spaces to compute rewards from. If provided, this changes the :code:`reward` element of the return tuple to be a list of rewards from the requested spaces. The default :code:`env.reward_space` is not returned. :param timeout: The maximum number of seconds to wait for the step to succeed. Accepts a float value. The default is 300 seconds. :return: A tuple of observation, reward, done, and info. Observation and reward are None if default observation/reward is not set. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def multistep( self, actions: Iterable[ActionType], observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, rewards: Optional[Iterable[Union[str, Reward]]] = None, timeout: float = 300, ): """Take a sequence of steps and return the final observation and reward. :param action: A sequence of actions to apply in order. :param observation_spaces: A list of observation spaces to compute observations from. If provided, this changes the :code:`observation` element of the return tuple to be a list of observations from the requested spaces. The default :code:`env.observation_space` is not returned. :param reward_spaces: A list of reward spaces to compute rewards from. If provided, this changes the :code:`reward` element of the return tuple to be a list of rewards from the requested spaces. The default :code:`env.reward_space` is not returned. :param timeout: The maximum number of seconds to wait for the steps to succeed. Accepts a float value. The default is 300 seconds. :return: A tuple of observation, reward, done, and info. Observation and reward are None if default observation/reward is not set. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def render( self, mode="human", ) -> Optional[str]: """Render the environment. :param mode: The render mode to use. :raises TypeError: If a default observation space is not set, or if the requested render mode does not exist. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def commandline(self) -> str: """Interface for :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` subclasses to provide an equivalent commandline invocation to the current environment state. See also :meth:`commandline_to_actions() <compiler_gym.envs.CompilerEnv.commandline_to_actions>`. :return: A string commandline invocation. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def commandline_to_actions(self, commandline: str) -> List[ActionType]: """Interface for :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` subclasses to convert from a commandline invocation to a sequence of actions. See also :meth:`commandline() <compiler_gym.envs.CompilerEnv.commandline>`. :return: A list of actions. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def apply(self, state: CompilerEnvState) -> None: # noqa """Replay this state on the given environment. :param state: A :class:`CompilerEnvState <compiler_gym.CompilerEnvState>` instance. :raises ValueError: If this state cannot be applied. """ raise NotImplementedError("abstract method")
[docs] @abstractmethod def validate(self, state: Optional[CompilerEnvState] = None) -> ValidationResult: """Validate an environment's state. :param state: A state to environment. If not provided, the current state is validated. :returns: A :class:`ValidationResult <compiler_gym.ValidationResult>`. """ raise NotImplementedError("abstract method")