Scippy

SCIP

Solving Constraint Integer Programs

bandit_epsgreedy.c
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the program and library */
4/* SCIP --- Solving Constraint Integer Programs */
5/* */
6/* Copyright (c) 2002-2024 Zuse Institute Berlin (ZIB) */
7/* */
8/* Licensed under the Apache License, Version 2.0 (the "License"); */
9/* you may not use this file except in compliance with the License. */
10/* You may obtain a copy of the License at */
11/* */
12/* http://www.apache.org/licenses/LICENSE-2.0 */
13/* */
14/* Unless required by applicable law or agreed to in writing, software */
15/* distributed under the License is distributed on an "AS IS" BASIS, */
16/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17/* See the License for the specific language governing permissions and */
18/* limitations under the License. */
19/* */
20/* You should have received a copy of the Apache-2.0 license */
21/* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22/* */
23/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24
25/**@file bandit_epsgreedy.c
26 * @ingroup OTHER_CFILES
27 * @brief implementation of (a variant of) epsilon greedy bandit algorithm
28 * @author Gregor Hendel
29 */
30
31/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32
33#include "scip/bandit.h"
35#include "scip/pub_bandit.h"
36#include "scip/pub_message.h"
37#include "scip/pub_misc.h"
38#include "scip/scip_bandit.h"
39#include "scip/scip_mem.h"
41
42#define BANDIT_NAME "eps-greedy"
43#define EPSGREEDY_SMALL 1e-6
44
45/*
46 * Data structures
47 */
48
49/** private data structure of epsilon greedy bandit algorithm */
50struct SCIP_BanditData
51{
52 SCIP_Real* weights; /**< weights for every action */
53 SCIP_Real* priorities; /**< saved priorities for tie breaking */
54 int* sels; /**< individual number of selections per action */
55 SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
56 SCIP_Bool usemodification; /**< TRUE if modified eps greedy should be used */
57 SCIP_Real decayfactor; /**< the factor to reduce the weight of older observations if exponential decay is enabled */
58 int avglim; /**< nonnegative limit on observation number before the exponential decay starts,
59 * only relevant if exponential decay is enabled
60 */
61 int nselections; /**< counter for the number of selection calls */
62 SCIP_Bool preferrecent; /**< should the weights be updated in an exponentially decaying way? */
63};
64
65/*
66 * Callback methods of bandit algorithm virtual function table
67 */
68
69/** callback to free bandit specific data structures */
70SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
71{ /*lint --e{715}*/
72 SCIP_BANDITDATA* banditdata;
73 int nactions;
74
75 assert(bandit != NULL);
76
77 banditdata = SCIPbanditGetData(bandit);
78 assert(banditdata != NULL);
79 assert(banditdata->weights != NULL);
80 nactions = SCIPbanditGetNActions(bandit);
81
82 BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, nactions);
83 BMSfreeBlockMemoryArray(blkmem, &banditdata->priorities, nactions);
84 BMSfreeBlockMemoryArray(blkmem, &banditdata->sels, nactions);
85 BMSfreeBlockMemory(blkmem, &banditdata);
86
87 SCIPbanditSetData(bandit, NULL);
88
89 return SCIP_OKAY;
90}
91
92/** selection callback for bandit algorithm */
93SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
94{ /*lint --e{715}*/
95 SCIP_BANDITDATA* banditdata;
96 SCIP_Real randnr;
97 SCIP_Real curreps;
98 SCIP_RANDNUMGEN* rng;
99 int nactions;
100 assert(bandit != NULL);
101 assert(selection != NULL);
102
103 banditdata = SCIPbanditGetData(bandit);
104 assert(banditdata != NULL);
105 rng = SCIPbanditGetRandnumgen(bandit);
106 assert(rng != NULL);
107
108 nactions = SCIPbanditGetNActions(bandit);
109
110 /* roll the dice to check if the best element should be picked, or an element at random */
111 randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
112
113 /* make epsilon decrease with an increasing number of selections */
114 banditdata->nselections++;
115 curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
116
117 /* select the best action seen so far */
118 if( randnr >= curreps )
119 {
120 SCIP_Real* weights = banditdata->weights;
121 SCIP_Real* priorities = banditdata->priorities;
122 int j;
123 SCIP_Real maxweight;
124
125 assert(weights != NULL);
126 assert(priorities != NULL);
127
128 /* pick the element with the largest reward */
129 maxweight = weights[0];
130 *selection = 0;
131
132 /* determine reward for every element */
133 for( j = 1; j < nactions; ++j )
134 {
135 SCIP_Real weight = weights[j];
136
137 /* select the action that maximizes the reward, breaking ties by action priorities */
138 if( maxweight < weight
139 || (weight >= maxweight - EPSGREEDY_SMALL && priorities[j] > priorities[*selection] ) )
140 {
141 *selection = j;
142 maxweight = weight;
143 }
144 }
145 }
146 else if( ! banditdata->usemodification ) /* use normal eps greedy */
147 {
148 /* play one of the actions at random */
149 *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
150 }
151 else /* pick an action w.r.t. the distributions defined by its weights */
152 {
153 int j;
154 SCIP_Real sum;
155 SCIP_Real weightsum;
156 SCIP_Real* weights = banditdata->weights;
157
158 weightsum = 0.0;
159 for( j = 0; j < nactions; ++j )
160 weightsum += banditdata->weights[j];
161
162 /* pick a random number between 0.0 and sum of weights */
163 randnr = SCIPrandomGetReal(rng, 0.0, weightsum);
164
165 /* pick action w.r.t. the weights distribution */
166 sum = 0.0;
167 *selection = -1;
168 for( j = 0; j < nactions - 1; ++j )
169 {
170 sum += weights[j];
171
172 if( sum >= randnr )
173 {
174 *selection = j;
175 break;
176 }
177 }
178
179 if( *selection < 0 )
180 *selection = nactions - 1;
181 }
182
183 return SCIP_OKAY;
184}
185
186/** update callback for bandit algorithm */
187SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
188{ /*lint --e{715}*/
189 SCIP_BANDITDATA* banditdata;
190
191 assert(bandit != NULL);
192
193 banditdata = SCIPbanditGetData(bandit);
194 assert(banditdata != NULL);
195
196 /* increase the selection count */
197 ++banditdata->sels[selection];
198
199 /* the very first observation is directly stored as weight for both average or exponential decay */
200 if( banditdata->sels[selection] == 1 )
201 banditdata->weights[selection] = score;
202 else
203 {
204 /* use exponentially decreasing weights for older observations */
205 if( banditdata->preferrecent && banditdata->sels[selection] > banditdata->avglim )
206 {
207 /* decrease old weights by decay factor */
208 banditdata->weights[selection] *= banditdata->decayfactor;
209 banditdata->weights[selection] += (1.0 - banditdata->decayfactor) * score;
210 }
211 else
212 {
213 /* update average score */
214 SCIP_Real diff = score - banditdata->weights[selection];
215 banditdata->weights[selection] += diff / (SCIP_Real)(banditdata->sels[selection]);
216 }
217 }
218
219 return SCIP_OKAY;
220}
221
222/** reset callback for bandit algorithm */
223SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
224{ /*lint --e{715}*/
225 SCIP_BANDITDATA* banditdata;
226 SCIP_Real* weights;
227 int w;
228 int nactions;
229 SCIP_RANDNUMGEN* rng;
230
231 assert(bandit != NULL);
232
233 banditdata = SCIPbanditGetData(bandit);
234 assert(banditdata != NULL);
235
236 weights = banditdata->weights;
237 nactions = SCIPbanditGetNActions(bandit);
238 assert(weights != NULL);
239 assert(banditdata->priorities != NULL);
240 assert(nactions > 0);
241
242 rng = SCIPbanditGetRandnumgen(bandit);
243 assert(rng != NULL);
244
245 /* alter priorities slightly to make them unique */
246 if( priorities != NULL )
247 {
248 for( w = 1; w < nactions; ++w )
249 {
250 assert(priorities[w] >= 0);
251 banditdata->priorities[w] = priorities[w] + SCIPrandomGetReal(rng, -EPSGREEDY_SMALL, EPSGREEDY_SMALL);
252 }
253 }
254 else
255 {
256 /* use random priorities */
257 for( w = 0; w < nactions; ++w )
258 banditdata->priorities[w] = SCIPrandomGetReal(rng, 0.0, 1.0);
259 }
260
261 /* reset weights and selection counters to 0 */
262 BMSclearMemoryArray(weights, nactions);
263 BMSclearMemoryArray(banditdata->sels, nactions);
264
265 banditdata->nselections = 0;
266
267 return SCIP_OKAY;
268}
269
270/*
271 * interface methods of the Epsilon Greedy bandit algorithm
272 */
273
274/** internal method to create and reset epsilon greedy bandit algorithm */
276 BMS_BLKMEM* blkmem, /**< block memory */
277 BMS_BUFMEM* bufmem, /**< buffer memory */
278 SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
279 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
280 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
281 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
282 SCIP_Bool usemodification, /**< TRUE if modified eps greedy should be used */
283 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
284 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
285 int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
286 * only relevant if exponential decay is enabled */
287 int nactions, /**< the positive number of possible actions */
288 unsigned int initseed /**< initial random seed */
289 )
290{
291 SCIP_BANDITDATA* banditdata;
292
293 SCIP_ALLOC( BMSallocBlockMemory(blkmem, &banditdata) );
294 assert(banditdata != NULL);
295 assert(eps >= 0.0);
296
297 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
298 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->priorities, nactions) );
299 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->sels, nactions) );
300 banditdata->eps = eps;
301 banditdata->nselections = 0;
302 banditdata->usemodification = usemodification;
303 banditdata->preferrecent = preferrecent;
304 banditdata->decayfactor = decayfactor;
305 banditdata->avglim = avglim;
306
307 SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
308
309 return SCIP_OKAY;
310}
311
312/** create and resets an epsilon greedy bandit algorithm */
314 SCIP* scip, /**< SCIP data structure */
315 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
316 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
317 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
318 SCIP_Bool usemodification, /**< TRUE if modified eps greedy should be used */
319 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
320 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
321 int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
322 * only relevant if exponential decay is enabled */
323 int nactions, /**< the positive number of possible actions */
324 unsigned int initseed /**< initial seed for random number generation */
325 )
326{
327 SCIP_BANDITVTABLE* vtable;
328 assert(scip != NULL);
329 assert(epsgreedy != NULL);
330
332 if( vtable == NULL )
333 {
334 SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
335 return SCIP_INVALIDDATA;
336 }
337
339 priorities, eps, usemodification, preferrecent, decayfactor, avglim, nactions, SCIPinitializeRandomSeed(scip, initseed)) );
340
341 return SCIP_OKAY;
342}
343
344/** get weights array of epsilon greedy bandit algorithm */
346 SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
347 )
348{
349 SCIP_BANDITDATA* banditdata;
350 assert(epsgreedy != NULL);
351 banditdata = SCIPbanditGetData(epsgreedy);
352 assert(banditdata != NULL);
353
354 return banditdata->weights;
355}
356
357/** set epsilon parameter of epsilon greedy bandit algorithm */
359 SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
360 SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
361 )
362{
363 SCIP_BANDITDATA* banditdata;
364 assert(epsgreedy != NULL);
365 assert(eps >= 0);
366
367 banditdata = SCIPbanditGetData(epsgreedy);
368
369 banditdata->eps = eps;
370}
371
372
373/** creates the epsilon greedy bandit algorithm includes it in SCIP */
375 SCIP* scip /**< SCIP data structure */
376 )
377{
378 SCIP_BANDITVTABLE* banditvtable;
379
381 SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
382
383 return SCIP_OKAY;
384}
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:200
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:42
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition: bandit.c:190
internal methods for bandit algorithms
#define EPSGREEDY_SMALL
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
#define BANDIT_NAME
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool usemodification, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
internal methods for epsilon greedy bandit selection
SCIP_VAR * w
Definition: circlepacking.c:67
#define NULL
Definition: def.h:267
#define SCIP_Bool
Definition: def.h:91
#define SCIP_ALLOC(x)
Definition: def.h:385
#define SCIP_Real
Definition: def.h:173
#define SCIP_CALL(x)
Definition: def.h:374
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition: bandit.c:303
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition: bandit.c:293
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition: scip_bandit.c:80
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool usemodification, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
Definition: scip_bandit.c:48
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition: scip_mem.c:72
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition: misc.c:10130
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition: misc.c:10108
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
#define BMSfreeBlockMemory(mem, ptr)
Definition: memory.h:465
#define BMSallocBlockMemory(mem, ptr)
Definition: memory.h:451
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition: memory.h:454
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition: memory.h:467
#define BMSclearMemoryArray(ptr, num)
Definition: memory.h:130
struct BMS_BlkMem BMS_BLKMEM
Definition: memory.h:437
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition: scip_mem.c:57
real eps
public methods for bandit algorithms
public methods for message output
#define SCIPerrorMessage
Definition: pub_message.h:64
public data structures and miscellaneous methods
public methods for bandit algorithms
public methods for memory management
public methods for random numbers
struct SCIP_BanditData SCIP_BANDITDATA
Definition: type_bandit.h:56
@ SCIP_INVALIDDATA
Definition: type_retcode.h:52
@ SCIP_OKAY
Definition: type_retcode.h:42
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:63