Toggle navigation
SCIP Optimization Suite
SCIP
SoPlex
ZIMPL
UG
GCG
Documentation
SCIP 9.2.0
SCIP 8.1.0
SCIP 7.0.3
SCIP 6.0.2
SCIP 5.0.1
SCIP 4.0.1
SCIP 3.2.1
SCIP
Solving Constraint Integer Programs
pub_bandit_exp3.h
Go to the documentation of this file.
1
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2
/* */
3
/* This file is part of the program and library */
4
/* SCIP --- Solving Constraint Integer Programs */
5
/* */
6
/* Copyright (C) 2002-2018 Konrad-Zuse-Zentrum */
7
/* fuer Informationstechnik Berlin */
8
/* */
9
/* SCIP is distributed under the terms of the ZIB Academic License. */
10
/* */
11
/* You should have received a copy of the ZIB Academic License */
12
/* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13
/* */
14
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15
16
/**@file pub_bandit_exp3.h
17
* @ingroup PublicBanditMethods
18
* @brief public methods for Exp.3
19
* @author Gregor Hendel
20
*/
21
22
/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
23
24
#ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
25
#define SRC_SCIP_PUB_BANDIT_EXP3_H_
26
27
#include "
scip/scip.h
"
28
29
#ifdef __cplusplus
30
extern
"C"
{
31
#endif
32
33
/**@addtogroup PublicBanditMethods
34
*
35
* ## Exp.3
36
*
37
* Exp.3 is a randomized selection method for the multi-armed bandit problem
38
*
39
* Exp3 maintains a probability distribution
40
* according to which an action is drawn
41
* in every iteration.
42
* The probability distribution is a mixture between
43
* a uniform distribution and a softmax distribution
44
* based on the cumulative rewards of the actions.
45
* The weight of the uniform distribution in the mixture
46
* is controlled by the parameter \f$ \gamma \f$, ie.,
47
* setting \f$ \gamma = 1\f$ uses a uniform distribution
48
* in every selection step.
49
* The cumulative reward for the actions can be
50
* fine-tuned by adding a general bias for all actions.
51
* The bias is given by the parameter \f$ \beta \f$.
52
*
53
* @{
54
*/
55
56
/** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
57
extern
58
SCIP_RETCODE
SCIPcreateBanditExp3
(
59
SCIP
*
scip
,
/**< SCIP data structure */
60
SCIP_BANDIT
** exp3,
/**< pointer to store bandit algorithm */
61
SCIP_Real
* priorities,
/**< nonnegative priorities for each action, or NULL if not needed */
62
SCIP_Real
gammaparam,
/**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
63
SCIP_Real
beta,
/**< gain offset between 0 and 1 at every observation */
64
int
nactions,
/**< the positive number of actions for this bandit algorithm */
65
unsigned
int
initseed
/**< initial seed for random number generation */
66
);
67
68
/** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
69
extern
70
void
SCIPsetGammaExp3
(
71
SCIP_BANDIT
* exp3,
/**< bandit algorithm */
72
SCIP_Real
gammaparam
/**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
73
);
74
75
/** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
76
extern
77
void
SCIPsetBetaExp3
(
78
SCIP_BANDIT
* exp3,
/**< bandit algorithm */
79
SCIP_Real
beta
/**< gain offset between 0 and 1 at every observation */
80
);
81
82
/** returns probability to play an action */
83
extern
84
SCIP_Real
SCIPgetProbabilityExp3
(
85
SCIP_BANDIT
* exp3,
/**< bandit algorithm */
86
int
action
/**< index of the requested action */
87
);
88
89
/** @}*/
90
91
#ifdef __cplusplus
92
}
93
#endif
94
95
#endif
SCIPcreateBanditExp3
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
Definition:
bandit_exp3.c:299
Scip
Definition:
struct_scip.h:58
SCIPsetBetaExp3
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
Definition:
bandit_exp3.c:338
SCIP_RETCODE
enum SCIP_Retcode SCIP_RETCODE
Definition:
type_retcode.h:53
SCIP_Bandit
Definition:
struct_bandit.h:48
SCIPgetProbabilityExp3
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
Definition:
bandit_exp3.c:351
SCIPsetGammaExp3
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
Definition:
bandit_exp3.c:325
SCIP_Real
#define SCIP_Real
Definition:
def.h:149
scip
Definition:
objbranchrule.h:33
scip.h
SCIP callable library.