Publications
2025
- Diffusive limit approximation of pure jump optimal ergodic control problemsMarc Abeille, Bruno Bouchard, and Lorenzo CroissantStochastic Processes and their Applications Mar 2025
@article{ABC25, title = {Diffusive limit approximation of pure jump optimal ergodic control problems}, volume = {181}, copyright = {All rights reserved}, issn = {03044149}, doi = {10.1016/j.spa.2024.104536}, language = {en}, urldate = {2024-12-05}, journal = {Stochastic Processes and their Applications}, author = {Abeille, Marc and Bouchard, Bruno and Croissant, Lorenzo}, month = mar, year = {2025}, pages = {104536}, }
- Pareto-Optimality, Smoothness, and Stochasticity in Learning-Augmented One-Max-SearchFeb 2025
One-max search is a classic problem in online decision-making, in which a trader acts on a sequence of revealed prices and accepts one of them irrevocably to maximise its profit. The problem has been studied both in probabilistic and in worst-case settings, notably through competitive analysis, and more recently in learning-augmented settings in which the trader has access to a prediction on the sequence. However, existing approaches either lack smoothness, or do not achieve optimal worst-case guarantees: they do not attain the best possible trade-off between the consistency and the robustness of the algorithm. We close this gap by presenting the first algorithm that simultaneously achieves both of these important objectives. Furthermore, we show how to leverage the obtained smoothness to provide an analysis of one-max search in stochastic learning-augmented settings which capture randomness in both the observed prices and the prediction.
@misc{Benomar25, title = {Pareto-{Optimality}, {Smoothness}, and {Stochasticity} in {Learning}-{Augmented} {One}-{Max}-{Search}}, copyright = {All rights reserved}, urldate = {2025-02-11}, publisher = {arXiv}, author = {Benomar, Ziyad and Croissant, Lorenzo and Perchet, Vianney and Angelopoulos, Spyros}, month = feb, year = {2025}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Data Structures and Algorithms}, }
- Bandit Optimal TransportLorenzo CroissantFeb 2025
Despite the impressive progress in statistical Optimal Transport (OT) in recent years, there has been little interest in the study of the }emph{sequential learning} of OT. Surprisingly so, as this problem is both practically motivated and a challenging extension of existing settings such as linear bandits. This article considers (for the first time) the stochastic bandit problem of learning to solve generic Kantorovich and entropic OT problems from repeated interactions when the marginals are known but the cost is unknown. We provide \tildeO(\sqrtT) regret algorithms for both problems by extending linear bandits on Hilbert spaces. These results provide a reduction to infinite-dimensional linear bandits. To deal with the dimension, we provide a method to exploit the intrinsic regularity of the cost to learn, yielding corresponding regret bounds which interpolate between \tildeO(\sqrtT) and \tildeO(T).
@misc{Croissant_BOT_25, title = {Bandit {Optimal} {Transport}}, author = {Croissant, Lorenzo}, month = feb, year = {2025}, keywords = {Bandit Algorithms, Optimal transport}, }
2024
- Near-continuous time Reinforcement Learning for continuous state-action spacesLorenzo Croissant, Marc Abeille, and Bruno BouchardIn Proceedings of the 35th International Conference on Algorithmic Learning Theory 25–28 feb 2024
We consider the reinforcement learning problem of controlling an unknown dynamical system to maximise the long-term average reward along a single trajectory. Most of the literature considers system interactions that occur in discrete time and discrete state-action spaces. Although this standpoint is suitable for games, it is often inadequate for systems in which interactions occur at a high frequency, if not in continuous time, or those whose state spaces are large if not inherently continuous. Perhaps the only exception is the linear quadratic framework for which results exist both in discrete and continuous time. However, its ability to handle continuous states comes with the drawback of a rigid dynamic and reward structure. This work aims to overcome these shortcomings by modelling interaction times with a Poisson clock of frequency \varepsilon^-1 which captures arbitrary time scales from discrete (\varepsilon=1) to continuous time (\varepsilon\downarrow0). In addition, we consider a generic reward function and model the state dynamics according to a jump process with an arbitrary transition kernel on \mathbbR^d. We show that the celebrated optimism protocol applies when the sub-tasks (learning and planning) can be performed effectively. We tackle learning by extending the eluder dimension framework and propose an approximate planning method based on a diffusive limit (\varepsilon\downarrow0) approximation of the jump process. Overall, our algorithm enjoys a regret of order \tilde\mathcalO(\sqrtT) or \tilde\mathcalO(\varepsilon^1/2 T+\sqrtT) with the approximate planning. As the frequency of interactions blows up, the approximation error \varepsilon^1/2 T vanishes, showing that \tilde\mathcalO(\sqrtT) is attainable in near-continuous time.
@inproceedings{CAB23, author = {Croissant, Lorenzo and Abeille, Marc and Bouchard, Bruno}, title = {Near-continuous time {Reinforcement} {Learning} for continuous state-action spaces}, pages = {444-498}, booktitle = {Proceedings of the {35th} International Conference on Algorithmic Learning Theory}, year = {2024}, series = {Proceedings of Machine Learning Research}, month = {25--28 Feb}, publisher = {PMLR}, }
2023
- Diffusion limit control and reinforcement learning, with applications to online auctionsLorenzo Croissant25–28 feb 2023
We consider the diffusive limit of a generic pure-jump Markov control problem as the intensity of the driving Poisson process tends to infinity. We quantify the convergence speed in terms of the Hölder exponent of the Hessian of the limit problem. This approximation provides an efficient alternative method for the numerical resolution of these problems when the intensity of jumps is large. Considering the control of unknown systems, we extend this approach to the context of online Reinforcement Learning. Under the \acrlongOFU paradigm, we leverage the eluder dimension framework for learning and the diffusive limit for approximate resolution of the planning sub-problem. Our algorithm extends existing theory from discrete processes to continuous states and actions. Our study of diffusion limit systems is motivated and illustrated by the bidding problem in a high-frequency online auction against a revenue-maximising seller.
@phdthesis{thesis, author = {Croissant, Lorenzo}, school = {Université Paris-Dauphine, Université PSL}, title = {Diffusion limit control and reinforcement learning, with applications to online auctions}, year = {2023}, url = {https://theses.hal.science/tel-04356751/}, }
- Reinforcement Learning in near-continuous time for continuous state-action spacesLorenzo Croissant, Marc Abeille, and Bruno BouchardIn Sixteenth European Workshop on Reinforcement Learning 25–28 feb 2023
We consider the Reinforcement Learning problem of controlling an unknown dynamical system to maximise the long-term average reward along a single trajectory. Most of the literature considers system interactions that occur in discrete time and discrete state-action spaces. Although this standpoint is suitable for games, it is often inadequate for mechanical or digital systems in which interactions occur at a high frequency, if not in continuous time, and whose state spaces are large if not inherently continuous. Perhaps the only exception is the Linear Quadratic framework for which results exist both in discrete and continuous time. However, its ability to handle continuous states comes with the drawback of a rigid dynamic and reward structure. This work aims to overcome these shortcomings by modelling interaction times with a Poisson clock of frequency \varepsilon^-1, which captures arbitrary time scales: from discrete (\varepsilon=1) to continuous time (\varepsilon\downarrow0). In addition, we consider a generic reward function and model the state dynamics according to a jump process with an arbitrary transition kernel on \mathbbR^d. We show that the celebrated optimism protocol applies when the sub-tasks (learning and planning) can be performed effectively. We tackle learning within the eluder dimension framework and propose an approximate planning method based on a diffusive limit approximation of the jump process. Overall, our algorithm enjoys a regret of order \tilde\mathcalO(\varepsilon^1/2 T+\sqrtT). As the frequency of interactions blows up, the approximation error \varepsilon^1/2 T vanishes, showing that \tilde\mathcalO(\sqrtT) is attainable in near-continuous time.
@inproceedings{CAB23EWRL, title = {Reinforcement Learning in near-continuous time for continuous state-action spaces}, author = {Croissant, Lorenzo and Abeille, Marc and Bouchard, Bruno}, booktitle = {Sixteenth European Workshop on Reinforcement Learning}, year = {2023}, url = {https://openreview.net/forum?id=bqiRIs3fJNk}, }
2022
- Diffusive limit approximation of pure jump optimal ergodic control problemsMarc Abeille, Bruno Bouchard, and Lorenzo Croissant25–28 feb 2022
Motivated by the design of fast reinforcement learning algorithms, we study the diffusive limit of a class of pure jump ergodic stochastic control problems. We show that, whenever the intensity of jumps is large enough, the approximation error is governed by the Hölder continuity of the Hessian matrix of the solution to the limit ergodic partial differential equation. This extends to this context the results of [1] obtained for finite horizon problems. We also explain how to construct a first order error correction term under appropriate smoothness assumptions. Finally, we quantify the error induced by the use of the Markov control policy constructed from the numerical finite difference scheme associated to the limit diffusive problem, this seems to be new in the literature and of its own interest. This approach permits to reduce very significantly the numerical resolution cost.
@misc{ABC22, doi = {10.48550/ARXIV.2209.15284}, author = {Abeille, Marc and Bouchard, Bruno and Croissant, Lorenzo}, keywords = {Optimization and Control (math.OC), FOS: Mathematics}, title = {Diffusive limit approximation of pure jump optimal ergodic control problems}, publisher = {arXiv}, year = {2022}, copyright = {arXiv.org perpetual, non-exclusive license}, }
- Diffusive limit approximation of pure-jump optimal stochastic control problemsMarc Abeille, Bruno Bouchard, and Lorenzo CroissantJournal of Optimization Theory and Applications 25–28 feb 2022
We consider the diffusive limit of a typical pure-jump Markovian control problem as the intensity of the driving Poisson process tends to infinity. We show that the convergence speed is provided by the Hölder constant of the Hessian of the limit problem, and explain how correction terms can be constructed. This provides an alternative efficient method for the numerical approximation of the optimal control of a pure-jump problem in situations with very high intensity of jump. We illustrate this approach in the context of a display advertising auction problem.
@article{ABC21, title = {Diffusive limit approximation of pure-jump optimal stochastic control problems}, author = {Abeille, Marc and Bouchard, Bruno and Croissant, Lorenzo}, journal = {Journal of Optimization Theory and Applications}, pages = {1--30}, year = {2022}, publisher = {Springer}, doi = { 10.1007/s10957-022-02135-7}, url = {https://link.springer.com/article/10.1007/s10957-022-02135-7}, keywords = {Optimization and Control (math.OC), FOS: Mathematics, FOS: Mathematics}, }
2020
- Real-Time Optimisation for Online Learning in AuctionsLorenzo Croissant, Marc Abeille, and Clement CalauzenesIn Proceedings of the 37th International Conference on Machine Learning 13–18 jul 2020
In display advertising, a small group of sellers and bidders face each other in up to 10^12 auctions a day. In this context, revenue maximisation via monopoly price learning is a high-value problem for sellers. By nature, these auctions are online and produce a very high frequency stream of data. This results in a computational strain that requires algorithms be real-time. Unfortunately, existing methods inherited from the batch setting suffer O(\sqrtt) time/memory complexity at each update, prohibiting their use. In this paper, we provide the first algorithm for online learning of monopoly prices in online auctions whose update is constant in time and memory.
@inproceedings{CAC20, title = {Real-Time Optimisation for Online Learning in Auctions}, author = {Croissant, Lorenzo and Abeille, Marc and Calauzenes, Clement}, booktitle = {Proceedings of the 37th International Conference on Machine Learning}, pages = {2217--2226}, year = {2020}, editor = {III, Hal Daumé and Singh, Aarti}, volume = {119}, series = {Proceedings of Machine Learning Research}, month = {13--18 Jul}, publisher = {PMLR}, }