27 #define BANDIT_NAME "exp3" 35 struct SCIP_BanditData
57 assert(bandit != NULL);
60 assert(banditdata != NULL);
87 assert(bandit != NULL);
88 assert(selection != NULL);
91 assert(banditdata != NULL);
101 oneminusgamma = 1 - banditdata->gamma;
102 gammaoverk = banditdata->gamma / (
SCIP_Real)nactions;
103 weightsum = banditdata->weightsum;
104 weights = banditdata->weights;
112 for( i = 0; i < nactions - 1; ++i )
117 prob = oneminusgamma * weights[i] / weightsum + gammaoverk;
145 assert(bandit != NULL);
148 assert(banditdata != NULL);
151 assert(selection >= 0);
152 assert(selection < nactions);
157 beta = banditdata->beta;
158 oneminusgamma = 1.0 - banditdata->gamma;
159 gammaoverk = banditdata->gamma * eta;
160 weights = banditdata->weights;
161 weightsum = banditdata->weightsum;
162 newweightsum = weightsum;
168 probai = oneminusgamma * weights[selection] / weightsum + gammaoverk;
170 assert(probai > 0.0);
172 gainestim = score / probai;
173 newweightsum -= weights[selection];
174 weights[selection] *=
exp(eta * gainestim);
175 newweightsum += weights[selection];
183 for( j = 0; j < nactions; ++j )
186 probaj = oneminusgamma * weights[j] / weightsum + gammaoverk;
188 assert(probaj > 0.0);
192 gainestim = (score + beta) / probaj;
194 gainestim = beta / probaj;
196 weights[j] *=
exp(eta * gainestim);
197 newweightsum += weights[j];
201 banditdata->weightsum = newweightsum;
214 assert(bandit != NULL);
217 assert(banditdata != NULL);
219 weights = banditdata->weights;
221 assert(nactions > 0);
226 if( priorities != NULL )
233 for( i = 0; i < nactions; ++i )
235 assert(priorities[i] >= 0);
236 priosum += priorities[i];
242 normalization = nactions / priosum;
243 for( i = 0; i < nactions; ++i )
244 weights[i] = (priorities[i] * normalization) +
NUMTOL;
249 for( i = 0; i < nactions; ++i )
250 weights[i] = 1.0 +
NUMTOL;
256 for( i = 0; i < nactions; ++i )
257 weights[i] = 1.0 +
NUMTOL;
278 unsigned int initseed
284 assert(banditdata != NULL);
286 banditdata->gamma = gammaparam;
287 banditdata->beta = beta;
288 assert(gammaparam >= 0 && gammaparam <= 1);
289 assert(beta >= 0 && beta <= 1);
306 unsigned int initseed
332 assert(gammaparam >= 0 && gammaparam <= 1);
334 banditdata->gamma = gammaparam;
345 assert(beta >= 0 && beta <= 1);
347 banditdata->beta = beta;
358 assert(banditdata->weightsum > 0.0);
361 return (1.0 - banditdata->gamma) * banditdata->weights[action] / banditdata->weightsum + banditdata->gamma / (
SCIP_Real)
SCIPbanditGetNActions(exp3);
372 SCIPbanditFreeExp3, SCIPbanditSelectExp3, SCIPbanditUpdateExp3, SCIPbanditResetExp3) );
373 assert(vtable != NULL);
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateExp3)
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
enum SCIP_Retcode SCIP_RETCODE
SCIPInterval exp(const SCIPInterval &x)
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
SCIP_RETCODE SCIPbanditCreateExp3(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
SCIP_DECL_BANDITSELECT(SCIPbanditSelectExp3)
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
SCIP_DECL_BANDITRESET(SCIPbanditResetExp3)
SCIP_DECL_BANDITFREE(SCIPbanditFreeExp3)
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
#define BMSfreeBlockMemory(mem, ptr)
SCIP_RETCODE SCIPincludeBanditvtableExp3(SCIP *scip)
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
#define BMSallocBlockMemoryArray(mem, ptr, num)
#define BMSfreeBlockMemoryArray(mem, ptr, num)
unsigned int SCIPinitializeRandomSeed(SCIP *scip, int initialseedvalue)
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
struct SCIP_BanditData SCIP_BANDITDATA
internal methods for Exp.3 bandit algorithm
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
#define BMSallocBlockMemory(mem, ptr)
struct BMS_BlkMem BMS_BLKMEM
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)