28 #define BANDIT_NAME "ucb" 36 struct SCIP_BanditData
63 assert(bufmem != NULL);
70 banditdata->nselections = 0;
76 for( i = 0; i < nactions; ++i )
77 banditdata->startperm[i] = i;
80 if( priorities != NULL )
87 for( i = 0; i < nactions; ++i )
114 assert(bandit != NULL);
117 assert(banditdata != NULL);
138 assert(bandit != NULL);
139 assert(selection != NULL);
142 assert(banditdata != NULL);
145 counter = banditdata->counter;
147 if( banditdata->nselections < nactions )
149 *selection = banditdata->startperm[banditdata->nselections];
150 assert(counter[*selection] == 0);
160 meanscores = banditdata->meanscores;
163 assert(meanscores != NULL);
167 widthfactor = banditdata->alpha * LOG1P((
SCIP_Real)banditdata->nselections);
168 widthfactor =
sqrt(widthfactor);
176 for( i = 0; i < nactions; ++i )
180 assert(counter[i] > 0);
183 uppercb = meanscores[i];
185 uppercb += widthfactor / rootcount;
197 assert(*selection >= 0);
198 assert(*selection < nactions);
209 assert(bandit != NULL);
212 assert(banditdata != NULL);
213 assert(selection >= 0);
217 delta = score - banditdata->meanscores[selection];
218 ++banditdata->counter[selection];
219 banditdata->meanscores[selection] += delta / (
SCIP_Real)banditdata->counter[selection];
221 banditdata->nselections++;
232 assert(bufmem != NULL);
233 assert(bandit != NULL);
236 assert(banditdata != NULL);
262 assert(action < nactions);
265 if( banditdata->nselections < nactions )
269 assert(banditdata->counter[action] > 0);
270 uppercb = banditdata->meanscores[action];
273 uppercb +=
sqrt(banditdata->alpha * LOG1P((
SCIP_Real)banditdata->nselections) / (
SCIP_Real)banditdata->counter[action]);
285 assert(banditdata != NULL);
287 return banditdata->startperm;
299 unsigned int initseed
306 SCIPerrorMessage(
"UCB requires nonnegative alpha parameter, have %f\n", alpha);
311 assert(banditdata != NULL);
317 banditdata->alpha = alpha;
331 unsigned int initseed
357 SCIPbanditFreeUcb, SCIPbanditSelectUcb, SCIPbanditUpdateUcb, SCIPbanditResetUcb) );
358 assert(vtable != NULL);
SCIP_RETCODE SCIPcreateBanditUcb(SCIP *scip, SCIP_BANDIT **ucb, SCIP_Real *priorities, SCIP_Real alpha, int nactions, unsigned int initseed)
enum SCIP_Retcode SCIP_RETCODE
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
void SCIPsortDownRealInt(SCIP_Real *realarray, int *intarray, int len)
#define BMSduplicateBufferMemoryArray(mem, ptr, source, num)
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
internal methods for UCB bandit algorithm
SCIPInterval sqrt(const SCIPInterval &x)
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateUcb)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
SCIP_DECL_BANDITRESET(SCIPbanditResetUcb)
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
#define BMSfreeBlockMemory(mem, ptr)
SCIP_RETCODE SCIPincludeBanditvtableUcb(SCIP *scip)
#define BMSallocBlockMemoryArray(mem, ptr, num)
SCIP_DECL_BANDITSELECT(SCIPbanditSelectUcb)
#define BMSfreeBlockMemoryArray(mem, ptr, num)
void SCIPrandomPermuteIntArray(SCIP_RANDNUMGEN *randnumgen, int *array, int begin, int end)
int * SCIPgetStartPermutationUcb(SCIP_BANDIT *ucb)
unsigned int SCIPinitializeRandomSeed(SCIP *scip, int initialseedvalue)
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
struct SCIP_BanditData SCIP_BANDITDATA
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
SCIP_RETCODE SCIPbanditCreateUcb(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **ucb, SCIP_Real *priorities, SCIP_Real alpha, int nactions, unsigned int initseed)
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
static SCIP_RETCODE dataReset(BMS_BUFMEM *bufmem, SCIP_BANDIT *ucb, SCIP_BANDITDATA *banditdata, SCIP_Real *priorities, int nactions)
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
#define BMSallocBlockMemory(mem, ptr)
#define BMSclearMemoryArray(ptr, num)
struct BMS_BlkMem BMS_BLKMEM
SCIP_Real SCIPgetConfidenceBoundUcb(SCIP_BANDIT *ucb, int action)
#define BMSfreeBufferMemoryArray(mem, ptr)
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
SCIP_DECL_BANDITFREE(SCIPbanditFreeUcb)
memory allocation routines