Scippy

SCIP

Solving Constraint Integer Programs

pub_bandit_exp3.h
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the program and library */
4/* SCIP --- Solving Constraint Integer Programs */
5/* */
6/* Copyright (c) 2002-2024 Zuse Institute Berlin (ZIB) */
7/* */
8/* Licensed under the Apache License, Version 2.0 (the "License"); */
9/* you may not use this file except in compliance with the License. */
10/* You may obtain a copy of the License at */
11/* */
12/* http://www.apache.org/licenses/LICENSE-2.0 */
13/* */
14/* Unless required by applicable law or agreed to in writing, software */
15/* distributed under the License is distributed on an "AS IS" BASIS, */
16/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17/* See the License for the specific language governing permissions and */
18/* limitations under the License. */
19/* */
20/* You should have received a copy of the Apache-2.0 license */
21/* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22/* */
23/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24
25/**@file pub_bandit_exp3.h
26 * @ingroup PublicBanditMethods
27 * @brief public methods for Exp.3
28 * @author Gregor Hendel
29 */
30
31/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32
33#ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
34#define SRC_SCIP_PUB_BANDIT_EXP3_H_
35
36#include "scip/def.h"
37#include "scip/type_bandit.h"
38#include "scip/type_retcode.h"
39#include "scip/type_scip.h"
40
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45/**@addtogroup PublicBanditMethods
46 *
47 * ## Exp.3
48 *
49 * Exp.3 is a randomized selection method for the multi-armed bandit problem
50 *
51 * Exp3 maintains a probability distribution
52 * according to which an action is drawn
53 * in every iteration.
54 * The probability distribution is a mixture between
55 * a uniform distribution and a softmax distribution
56 * based on the cumulative rewards of the actions.
57 * The weight of the uniform distribution in the mixture
58 * is controlled by the parameter \f$ \gamma \f$, ie.,
59 * setting \f$ \gamma = 1\f$ uses a uniform distribution
60 * in every selection step.
61 * The cumulative reward for the actions can be
62 * fine-tuned by adding a general bias for all actions.
63 * The bias is given by the parameter \f$ \beta \f$.
64 *
65 * @{
66 */
67
68/** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
69SCIP_EXPORT
71 SCIP* scip, /**< SCIP data structure */
72 SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */
73 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
74 SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
75 SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */
76 int nactions, /**< the positive number of actions for this bandit algorithm */
77 unsigned int initseed /**< initial seed for random number generation */
78 );
79
80/** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
81SCIP_EXPORT
83 SCIP_BANDIT* exp3, /**< bandit algorithm */
84 SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
85 );
86
87/** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
88SCIP_EXPORT
90 SCIP_BANDIT* exp3, /**< bandit algorithm */
91 SCIP_Real beta /**< gain offset between 0 and 1 at every observation */
92 );
93
94/** returns probability to play an action */
95SCIP_EXPORT
97 SCIP_BANDIT* exp3, /**< bandit algorithm */
98 int action /**< index of the requested action */
99 );
100
101/** @}*/
102
103#ifdef __cplusplus
104}
105#endif
106
107#endif
common defines and data types used in all packages of SCIP
#define SCIP_Real
Definition: def.h:173
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
Definition: bandit_exp3.c:337
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
Definition: bandit_exp3.c:311
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
Definition: bandit_exp3.c:350
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
Definition: bandit_exp3.c:363
type definitions for bandit selection algorithms
type definitions for return codes for SCIP methods
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:63
type definitions for SCIP's main datastructure