28 #define BANDIT_NAME "eps-greedy" 29 #define DEFAULT_WEIGHT 0.2 35 struct SCIP_BanditData
52 assert(bandit != NULL);
55 assert(banditdata != NULL);
56 assert(banditdata->weights != NULL);
75 assert(bandit != NULL);
76 assert(selection != NULL);
79 assert(banditdata != NULL);
89 banditdata->nselections++;
93 if( randnr >= curreps )
99 assert(weights != NULL);
102 maxreward = weights[0];
106 for( j = 1; j < nactions; ++j )
110 if( maxreward < reward )
131 assert(bandit != NULL);
134 assert(banditdata != NULL);
137 banditdata->weights[selection] *= 0.9;
138 banditdata->weights[selection] += 0.1 * score;
151 assert(bandit != NULL);
154 assert(banditdata != NULL);
156 weights = banditdata->weights;
158 assert(weights != NULL);
159 assert(nactions > 0);
162 if( priorities != NULL )
168 priosum = priorities[0];
169 for( w = 1; w < nactions; ++w )
171 assert(priorities[w] >= 0);
172 priosum += priorities[w];
175 if( priosum == 0 || priosum == nactions)
178 for( w = 0; w < nactions; ++w )
187 for( w = 0; w < nactions; ++w )
188 weights[w] = priorities[w] * normalization;
194 for( w = 0; w < nactions; ++w )
198 banditdata->nselections = 0;
216 unsigned int initseed
222 assert(banditdata != NULL);
226 banditdata->eps =
eps;
227 banditdata->nselections = 0;
241 unsigned int initseed
245 assert(scip != NULL);
246 assert(epsgreedy != NULL);
267 assert(epsgreedy != NULL);
269 assert(banditdata != NULL);
271 return banditdata->weights;
281 assert(epsgreedy != NULL);
286 banditdata->eps =
eps;
298 SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, int nactions, unsigned int initseed)
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
enum SCIP_Retcode SCIP_RETCODE
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
SCIPInterval sqrt(const SCIPInterval &x)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
#define BMSfreeBlockMemory(mem, ptr)
#define BMSallocBlockMemoryArray(mem, ptr, num)
#define BMSfreeBlockMemoryArray(mem, ptr, num)
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, int nactions, unsigned int initseed)
unsigned int SCIPinitializeRandomSeed(SCIP *scip, int initialseedvalue)
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
struct SCIP_BanditData SCIP_BANDITDATA
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
internal methods for epsilon greedy bandit selection
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
#define BMSallocBlockMemory(mem, ptr)
struct BMS_BlkMem BMS_BLKMEM
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
memory allocation routines