@techreport{perlert-wg-00, number = {draft-perlert-wg-00}, type = {Internet-Draft}, institution = {Internet Engineering Task Force}, publisher = {Internet Engineering Task Force}, note = {Work in Progress}, url = {https://datatracker.ietf.org/doc/draft-perlert-wg/00/}, author = {Ruben Montero}, title = {{Protocol for Evaluating Reinforcement Learning Environments in Real Time}}, pagetotal = 10, year = 2020, month = aug, day = 13, abstract = {This document defines a simple UDP protocol for communicating a server simulating a reinforcement learning environment and a client observing it and responding with actions. Reinforcement learning problems are usually defined within the scope of a Markov Decission Process (MDP) where an agent sends an action belonging to an action space to an environment. The environment acts as a black box returning an observation and a reward for the agent, whose goal is to maximize the total obtained rewards. Although the problem statement is easy to understand, there are no conventions on how to communicate a reinforcement learning simulation with a client agent, either in a local network or over the Internet. Additionally, giving an answer to this can be especially useful when it comes to multiagent support and analysis. The protocol PERLERT defined in this document assumes that server and client have shared certain information beforehand via another way of communication like a web page served using HTTP protocol. For example, the client must know a port number and an instance number before proceeding to participate in a simulation run on a server. Also, although it is often desired to know the full feedback from the environment, PERLERT focuses on real-time interaction where human agents can interact with AI agents even if that means that information can be lost due to network packet loss.}, }