Scippy

SCIP

Solving Constraint Integer Programs

pub_bandit_exp3.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright 2002-2022 Zuse Institute Berlin */
7 /* */
8 /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 /* you may not use this file except in compliance with the License. */
10 /* You may obtain a copy of the License at */
11 /* */
12 /* http://www.apache.org/licenses/LICENSE-2.0 */
13 /* */
14 /* Unless required by applicable law or agreed to in writing, software */
15 /* distributed under the License is distributed on an "AS IS" BASIS, */
16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 /* See the License for the specific language governing permissions and */
18 /* limitations under the License. */
19 /* */
20 /* You should have received a copy of the Apache-2.0 license */
21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22 /* */
23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 
25 /**@file pub_bandit_exp3.h
26  * @ingroup PublicBanditMethods
27  * @brief public methods for Exp.3
28  * @author Gregor Hendel
29  */
30 
31 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32 
33 #ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
34 #define SRC_SCIP_PUB_BANDIT_EXP3_H_
35 
36 #include "scip/def.h"
37 #include "scip/type_bandit.h"
38 #include "scip/type_retcode.h"
39 #include "scip/type_scip.h"
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 
45 /**@addtogroup PublicBanditMethods
46  *
47  * ## Exp.3
48  *
49  * Exp.3 is a randomized selection method for the multi-armed bandit problem
50  *
51  * Exp3 maintains a probability distribution
52  * according to which an action is drawn
53  * in every iteration.
54  * The probability distribution is a mixture between
55  * a uniform distribution and a softmax distribution
56  * based on the cumulative rewards of the actions.
57  * The weight of the uniform distribution in the mixture
58  * is controlled by the parameter \f$ \gamma \f$, ie.,
59  * setting \f$ \gamma = 1\f$ uses a uniform distribution
60  * in every selection step.
61  * The cumulative reward for the actions can be
62  * fine-tuned by adding a general bias for all actions.
63  * The bias is given by the parameter \f$ \beta \f$.
64  *
65  * @{
66  */
67 
68 /** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
69 SCIP_EXPORT
71  SCIP* scip, /**< SCIP data structure */
72  SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */
73  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
74  SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
75  SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */
76  int nactions, /**< the positive number of actions for this bandit algorithm */
77  unsigned int initseed /**< initial seed for random number generation */
78  );
79 
80 /** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
81 SCIP_EXPORT
82 void SCIPsetGammaExp3(
83  SCIP_BANDIT* exp3, /**< bandit algorithm */
84  SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
85  );
86 
87 /** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
88 SCIP_EXPORT
89 void SCIPsetBetaExp3(
90  SCIP_BANDIT* exp3, /**< bandit algorithm */
91  SCIP_Real beta /**< gain offset between 0 and 1 at every observation */
92  );
93 
94 /** returns probability to play an action */
95 SCIP_EXPORT
97  SCIP_BANDIT* exp3, /**< bandit algorithm */
98  int action /**< index of the requested action */
99  );
100 
101 /** @}*/
102 
103 #ifdef __cplusplus
104 }
105 #endif
106 
107 #endif
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
Definition: bandit_exp3.c:311
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
Definition: bandit_exp3.c:350
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:63
type definitions for return codes for SCIP methods
type definitions for SCIP&#39;s main datastructure
type definitions for bandit selection algorithms
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
Definition: bandit_exp3.c:363
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
Definition: bandit_exp3.c:337
#define SCIP_Real
Definition: def.h:186
common defines and data types used in all packages of SCIP