Coverage for /builds/hweiske/ase/ase/optimize/gpmin/gpmin.py: 69.92%

123 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-22 11:22 +0000

1import warnings 

2 

3import numpy as np 

4from scipy.optimize import minimize 

5 

6from ase.io.jsonio import write_json 

7from ase.optimize.gpmin.gp import GaussianProcess 

8from ase.optimize.gpmin.kernel import SquaredExponential 

9from ase.optimize.gpmin.prior import ConstantPrior 

10from ase.optimize.optimize import Optimizer 

11from ase.parallel import world 

12 

13 

14class GPMin(Optimizer, GaussianProcess): 

15 def __init__(self, atoms, restart=None, logfile='-', trajectory=None, 

16 prior=None, kernel=None, master=None, noise=None, weight=None, 

17 scale=None, force_consistent=Optimizer._deprecated, 

18 batch_size=None, 

19 bounds=None, update_prior_strategy="maximum", 

20 update_hyperparams=False, comm=world): 

21 """Optimize atomic positions using GPMin algorithm, which uses both 

22 potential energies and forces information to build a PES via Gaussian 

23 Process (GP) regression and then minimizes it. 

24 

25 Default behaviour: 

26 -------------------- 

27 The default values of the scale, noise, weight, batch_size and bounds 

28 parameters depend on the value of update_hyperparams. In order to get 

29 the default value of any of them, they should be set up to None. 

30 Default values are: 

31 

32 update_hyperparams = True 

33 scale : 0.3 

34 noise : 0.004 

35 weight: 2. 

36 bounds: 0.1 

37 batch_size: 1 

38 

39 update_hyperparams = False 

40 scale : 0.4 

41 noise : 0.005 

42 weight: 1. 

43 bounds: irrelevant 

44 batch_size: irrelevant 

45 

46 Parameters: 

47 ------------------ 

48 

49 atoms: Atoms object 

50 The Atoms object to relax. 

51 

52 restart: string 

53 JSON file used to store the training set. If set, file with 

54 such a name will be searched and the data in the file incorporated 

55 to the new training set, if the file exists. 

56 

57 logfile: file object or str 

58 If *logfile* is a string, a file with that name will be opened. 

59 Use '-' for stdout 

60 

61 trajectory: string 

62 File used to store trajectory of atomic movement. 

63 

64 master: boolean 

65 Defaults to None, which causes only rank 0 to save files. If 

66 set to True, this rank will save files. 

67 

68 prior: Prior object or None 

69 Prior for the GP regression of the PES surface 

70 See ase.optimize.gpmin.prior 

71 If *prior* is None, then it is set as the 

72 ConstantPrior with the constant being updated 

73 using the update_prior_strategy specified as a parameter 

74 

75 kernel: Kernel object or None 

76 Kernel for the GP regression of the PES surface 

77 See ase.optimize.gpmin.kernel 

78 If *kernel* is None the SquaredExponential kernel is used. 

79 Note: It needs to be a kernel with derivatives!!!!! 

80 

81 noise: float 

82 Regularization parameter for the Gaussian Process Regression. 

83 

84 weight: float 

85 Prefactor of the Squared Exponential kernel. 

86 If *update_hyperparams* is False, changing this parameter 

87 has no effect on the dynamics of the algorithm. 

88 

89 update_prior_strategy: string 

90 Strategy to update the constant from the ConstantPrior 

91 when more data is collected. It does only work when 

92 Prior = None 

93 

94 options: 

95 'maximum': update the prior to the maximum sampled energy 

96 'init' : fix the prior to the initial energy 

97 'average': use the average of sampled energies as prior 

98 

99 scale: float 

100 scale of the Squared Exponential Kernel 

101 

102 update_hyperparams: boolean 

103 Update the scale of the Squared exponential kernel 

104 every batch_size-th iteration by maximizing the 

105 marginal likelihood. 

106 

107 batch_size: int 

108 Number of new points in the sample before updating 

109 the hyperparameters. 

110 Only relevant if the optimizer is executed in update_hyperparams 

111 mode: (update_hyperparams = True) 

112 

113 bounds: float, 0<bounds<1 

114 Set bounds to the optimization of the hyperparameters. 

115 Let t be a hyperparameter. Then it is optimized under the 

116 constraint (1-bound)*t_0 <= t <= (1+bound)*t_0 

117 where t_0 is the value of the hyperparameter in the previous 

118 step. 

119 If bounds is False, no constraints are set in the optimization of 

120 the hyperparameters. 

121 

122 comm: Communicator object 

123 Communicator to handle parallel file reading and writing. 

124 

125 .. warning:: The memory of the optimizer scales as O(n²N²) where 

126 N is the number of atoms and n the number of steps. 

127 If the number of atoms is sufficiently high, this 

128 may cause a memory issue. 

129 This class prints a warning if the user tries to 

130 run GPMin with more than 100 atoms in the unit cell. 

131 """ 

132 # Warn the user if the number of atoms is very large 

133 if len(atoms) > 100: 

134 warning = ('Possible Memory Issue. There are more than ' 

135 '100 atoms in the unit cell. The memory ' 

136 'of the process will increase with the number ' 

137 'of steps, potentially causing a memory issue. ' 

138 'Consider using a different optimizer.') 

139 

140 warnings.warn(warning) 

141 

142 # Give it default hyperparameters 

143 if update_hyperparams: # Updated GPMin 

144 if scale is None: 

145 scale = 0.3 

146 if noise is None: 

147 noise = 0.004 

148 if weight is None: 

149 weight = 2. 

150 if bounds is None: 

151 self.eps = 0.1 

152 elif bounds is False: 

153 self.eps = None 

154 else: 

155 self.eps = bounds 

156 if batch_size is None: 

157 self.nbatch = 1 

158 else: 

159 self.nbatch = batch_size 

160 else: # GPMin without updates 

161 if scale is None: 

162 scale = 0.4 

163 if noise is None: 

164 noise = 0.001 

165 if weight is None: 

166 weight = 1. 

167 if bounds is not None: 

168 warning = ('The parameter bounds is of no use ' 

169 'if update_hyperparams is False. ' 

170 'The value provided by the user ' 

171 'is being ignored.') 

172 warnings.warn(warning, UserWarning) 

173 if batch_size is not None: 

174 warning = ('The parameter batch_size is of no use ' 

175 'if update_hyperparams is False. ' 

176 'The value provided by the user ' 

177 'is being ignored.') 

178 warnings.warn(warning, UserWarning) 

179 

180 # Set the variables to something anyways 

181 self.eps = False 

182 self.nbatch = None 

183 

184 self.strategy = update_prior_strategy 

185 self.update_hp = update_hyperparams 

186 self.function_calls = 1 

187 self.force_calls = 0 

188 self.x_list = [] # Training set features 

189 self.y_list = [] # Training set targets 

190 

191 Optimizer.__init__(self, atoms=atoms, restart=restart, logfile=logfile, 

192 trajectory=trajectory, master=master, comm=comm, 

193 force_consistent=force_consistent) 

194 if prior is None: 

195 self.update_prior = True 

196 prior = ConstantPrior(constant=None) 

197 else: 

198 self.update_prior = False 

199 

200 if kernel is None: 

201 kernel = SquaredExponential() 

202 GaussianProcess.__init__(self, prior, kernel) 

203 self.set_hyperparams(np.array([weight, scale, noise])) 

204 

205 def acquisition(self, r): 

206 e = self.predict(r) 

207 return e[0], e[1:] 

208 

209 def update(self, r, e, f): 

210 """Update the PES 

211 

212 Update the training set, the prior and the hyperparameters. 

213 Finally, train the model 

214 """ 

215 # update the training set 

216 self.x_list.append(r) 

217 f = f.reshape(-1) 

218 y = np.append(np.array(e).reshape(-1), -f) 

219 self.y_list.append(y) 

220 

221 # Set/update the constant for the prior 

222 if self.update_prior: 

223 if self.strategy == 'average': 

224 av_e = np.mean(np.array(self.y_list)[:, 0]) 

225 self.prior.set_constant(av_e) 

226 elif self.strategy == 'maximum': 

227 max_e = np.max(np.array(self.y_list)[:, 0]) 

228 self.prior.set_constant(max_e) 

229 elif self.strategy == 'init': 

230 self.prior.set_constant(e) 

231 self.update_prior = False 

232 

233 # update hyperparams 

234 if (self.update_hp and self.function_calls % self.nbatch == 0 and 

235 self.function_calls != 0): 

236 self.fit_to_batch() 

237 

238 # build the model 

239 self.train(np.array(self.x_list), np.array(self.y_list)) 

240 

241 def relax_model(self, r0): 

242 result = minimize(self.acquisition, r0, method='L-BFGS-B', jac=True) 

243 if result.success: 

244 return result.x 

245 else: 

246 self.dump() 

247 raise RuntimeError("The minimization of the acquisition function " 

248 "has not converged") 

249 

250 def fit_to_batch(self): 

251 """Fit hyperparameters keeping the ratio noise/weight fixed""" 

252 ratio = self.noise / self.kernel.weight 

253 self.fit_hyperparameters(np.array(self.x_list), 

254 np.array(self.y_list), eps=self.eps) 

255 self.noise = ratio * self.kernel.weight 

256 

257 def step(self, f=None): 

258 optimizable = self.optimizable 

259 if f is None: 

260 f = optimizable.get_forces() 

261 

262 r0 = optimizable.get_positions().reshape(-1) 

263 e0 = optimizable.get_potential_energy() 

264 self.update(r0, e0, f) 

265 

266 r1 = self.relax_model(r0) 

267 optimizable.set_positions(r1.reshape(-1, 3)) 

268 e1 = optimizable.get_potential_energy() 

269 f1 = optimizable.get_forces() 

270 self.function_calls += 1 

271 self.force_calls += 1 

272 count = 0 

273 while e1 >= e0: 

274 self.update(r1, e1, f1) 

275 r1 = self.relax_model(r0) 

276 

277 optimizable.set_positions(r1.reshape(-1, 3)) 

278 e1 = optimizable.get_potential_energy() 

279 f1 = optimizable.get_forces() 

280 self.function_calls += 1 

281 self.force_calls += 1 

282 if self.converged(f1): 

283 break 

284 

285 count += 1 

286 if count == 30: 

287 raise RuntimeError("A descent model could not be built") 

288 self.dump() 

289 

290 def dump(self): 

291 """Save the training set""" 

292 if world.rank == 0 and self.restart is not None: 

293 with open(self.restart, 'wb') as fd: 

294 write_json(fd, (self.x_list, self.y_list)) 

295 

296 def read(self): 

297 self.x_list, self.y_list = self.load()