Coverage for /builds/hweiske/ase/ase/calculators/castep.py: 49.26%

1088 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-22 11:22 +0000

1"""This module defines an interface to CASTEP for 

2 use by the ASE (Webpage: http://wiki.fysik.dtu.dk/ase) 

3 

4Authors: 

5 Max Hoffmann, max.hoffmann@ch.tum.de 

6 Joerg Meyer, joerg.meyer@ch.tum.de 

7 Simon P. Rittmeyer, simon.rittmeyer@tum.de 

8 

9Contributors: 

10 Juan M. Lorenzi, juan.lorenzi@tum.de 

11 Georg S. Michelitsch, georg.michelitsch@tch.tum.de 

12 Reinhard J. Maurer, reinhard.maurer@yale.edu 

13 Simone Sturniolo, simone.sturniolo@stfc.ac.uk 

14""" 

15 

16import difflib 

17import glob 

18import io 

19import json 

20import os 

21import re 

22import shutil 

23import subprocess 

24import sys 

25import tempfile 

26import time 

27import warnings 

28from collections import defaultdict, namedtuple 

29from copy import deepcopy 

30from itertools import product 

31from pathlib import Path 

32from typing import Any, Dict, List, Optional 

33 

34import numpy as np 

35 

36from ase import Atoms, units 

37from ase.calculators.calculator import (BaseCalculator, compare_atoms, 

38 kpts2sizeandoffsets) 

39from ase.config import cfg 

40from ase.constraints import FixAtoms, FixCartesian, FixConstraint 

41from ase.dft.kpoints import BandPath 

42from ase.io.castep import read_bands, read_param 

43from ase.io.castep.castep_input_file import CastepCell, CastepParam 

44from ase.parallel import paropen 

45 

46__all__ = [ 

47 'Castep', 

48 'CastepCell', 

49 'CastepParam', 

50 'create_castep_keywords'] 

51 

52# A convenient table to avoid the previously used "eval" 

53_tf_table = { 

54 '': True, # Just the keyword is equivalent to True 

55 'True': True, 

56 'False': False} 

57 

58 

59def _self_getter(getf): 

60 # A decorator that makes it so that if no 'atoms' argument is passed to a 

61 # getter function, self.atoms is used instead 

62 

63 def decor_getf(self, atoms=None, *args, **kwargs): 

64 

65 if atoms is None: 

66 atoms = self.atoms 

67 

68 return getf(self, atoms, *args, **kwargs) 

69 

70 return decor_getf 

71 

72 

73class Castep(BaseCalculator): 

74 r""" 

75CASTEP Interface Documentation 

76 

77 

78Introduction 

79============ 

80 

81CASTEP_ [1]_ W_ is a software package which uses density functional theory to 

82provide a good atomic-level description of all manner of materials and 

83molecules. CASTEP can give information about total energies, forces and 

84stresses on an atomic system, as well as calculating optimum geometries, band 

85structures, optical spectra, phonon spectra and much more. It can also perform 

86molecular dynamics simulations. 

87 

88The CASTEP calculator interface class offers intuitive access to all CASTEP 

89settings and most results. All CASTEP specific settings are accessible via 

90attribute access (*i.e*. ``calc.param.keyword = ...`` or 

91``calc.cell.keyword = ...``) 

92 

93 

94Getting Started: 

95================ 

96 

97Set the environment variables appropriately for your system:: 

98 

99 export CASTEP_COMMAND=' ... ' 

100 export CASTEP_PP_PATH=' ... ' 

101 

102Note: alternatively to CASTEP_PP_PATH one can set PSPOT_DIR 

103as CASTEP consults this by default, i.e.:: 

104 

105 export PSPOT_DIR=' ... ' 

106 

107 

108Running the Calculator 

109====================== 

110 

111The default initialization command for the CASTEP calculator is 

112 

113.. class:: Castep(directory='CASTEP', label='castep') 

114 

115To do a minimal run one only needs to set atoms, this will use all 

116default settings of CASTEP, meaning LDA, singlepoint, etc.. 

117 

118With a generated *castep_keywords.json* in place all options are accessible 

119by inspection, *i.e.* tab-completion. This works best when using ``ipython``. 

120All options can be accessed via ``calc.param.<TAB>`` or ``calc.cell.<TAB>`` 

121and documentation is printed with ``calc.param.<keyword> ?`` or 

122``calc.cell.<keyword> ?``. All options can also be set directly 

123using ``calc.keyword = ...`` or ``calc.KEYWORD = ...`` or even 

124``ialc.KeYwOrD`` or directly as named arguments in the call to the constructor 

125(*e.g.* ``Castep(task='GeometryOptimization')``). 

126If using this calculator on a machine without CASTEP, one might choose to copy 

127a *castep_keywords.json* file generated elsewhere in order to access this 

128feature: the file will be used if located in the working directory, 

129*$HOME/.ase/* or *ase/ase/calculators/* within the ASE library. The file should 

130be generated the first time it is needed, but you can generate a new keywords 

131file in the currect directory with ``python -m ase.calculators.castep``. 

132 

133All options that go into the ``.param`` file are held in an ``CastepParam`` 

134instance, while all options that go into the ``.cell`` file and don't belong 

135to the atoms object are held in an ``CastepCell`` instance. Each instance can 

136be created individually and can be added to calculators by attribute 

137assignment, *i.e.* ``calc.param = param`` or ``calc.cell = cell``. 

138 

139All internal variables of the calculator start with an underscore (_). 

140All cell attributes that clearly belong into the atoms object are blocked. 

141Setting ``calc.atoms_attribute`` (*e.g.* ``= positions``) is sent directly to 

142the atoms object. 

143 

144 

145Arguments: 

146========== 

147 

148========================= ==================================================== 

149Keyword Description 

150========================= ==================================================== 

151``directory`` The relative path where all input and output files 

152 will be placed. If this does not exist, it will be 

153 created. Existing directories will be moved to 

154 directory-TIMESTAMP unless self._rename_existing_dir 

155 is set to false. 

156 

157``label`` The prefix of .param, .cell, .castep, etc. files. 

158 

159``castep_command`` Command to run castep. Can also be set via the bash 

160 environment variable ``CASTEP_COMMAND``. If none is 

161 given or found, will default to ``castep`` 

162 

163``check_castep_version`` Boolean whether to check if the installed castep 

164 version matches the version from which the available 

165 options were deduced. Defaults to ``False``. 

166 

167``castep_pp_path`` The path where the pseudopotentials are stored. Can 

168 also be set via the bash environment variables 

169 ``PSPOT_DIR`` (preferred) and ``CASTEP_PP_PATH``. 

170 Will default to the current working directory if 

171 none is given or found. Note that pseudopotentials 

172 may be generated on-the-fly if they are not found. 

173 

174``find_pspots`` Boolean whether to search for pseudopotentials in 

175 ``<castep_pp_path>`` or not. If activated, files in 

176 this directory will be checked for typical names. If 

177 files are not found, they will be generated on the 

178 fly, depending on the ``_build_missing_pspots`` 

179 value. A RuntimeError will be raised in case 

180 multiple files per element are found. Defaults to 

181 ``False``. 

182``keyword_tolerance`` Integer to indicate the level of tolerance to apply 

183 validation of any parameters set in the CastepCell 

184 or CastepParam objects against the ones found in 

185 castep_keywords. Levels are as following: 

186 

187 0 = no tolerance, keywords not found in 

188 castep_keywords will raise an exception 

189 

190 1 = keywords not found will be accepted but produce 

191 a warning (default) 

192 

193 2 = keywords not found will be accepted silently 

194 

195 3 = no attempt is made to look for 

196 castep_keywords.json at all 

197``castep_keywords`` Can be used to pass a CastepKeywords object that is 

198 then used with no attempt to actually load a 

199 castep_keywords.json file. Most useful for debugging 

200 and testing purposes. 

201 

202========================= ==================================================== 

203 

204 

205Additional Settings 

206=================== 

207 

208========================= ==================================================== 

209Internal Setting Description 

210========================= ==================================================== 

211``_castep_command`` (``=castep``): the actual shell command used to 

212 call CASTEP. 

213 

214``_check_checkfile`` (``=True``): this makes write_param() only 

215 write a continue or reuse statement if the 

216 addressed .check or .castep_bin file exists in the 

217 directory. 

218 

219``_copy_pspots`` (``=False``): if set to True the calculator will 

220 actually copy the needed pseudo-potential (\*.usp) 

221 file, usually it will only create symlinks. 

222 

223``_link_pspots`` (``=True``): if set to True the calculator will 

224 actually will create symlinks to the needed pseudo 

225 potentials. Set this option (and ``_copy_pspots``) 

226 to False if you rather want to access your pseudo 

227 potentials using the PSPOT_DIR environment variable 

228 that is read by CASTEP. 

229 *Note:* This option has no effect if ``copy_pspots`` 

230 is True.. 

231 

232``_build_missing_pspots`` (``=True``): if set to True, castep will generate 

233 missing pseudopotentials on the fly. If not, a 

234 RuntimeError will be raised if not all files were 

235 found. 

236 

237``_export_settings`` (``=True``): if this is set to 

238 True, all calculator internal settings shown here 

239 will be included in the .param in a comment line (#) 

240 and can be read again by merge_param. merge_param 

241 can be forced to ignore this directive using the 

242 optional argument ``ignore_internal_keys=True``. 

243 

244``_force_write`` (``=True``): this controls wether the \*cell and 

245 \*param will be overwritten. 

246 

247``_prepare_input_only`` (``=False``): If set to True, the calculator will 

248 create \*cell und \*param file but not 

249 start the calculation itself. 

250 If this is used to prepare jobs locally 

251 and run on a remote cluster it is recommended 

252 to set ``_copy_pspots = True``. 

253 

254``_castep_pp_path`` (``='.'``) : the place where the calculator 

255 will look for pseudo-potential files. 

256 

257``_find_pspots`` (``=False``): if set to True, the calculator will 

258 try to find the respective pseudopotentials from 

259 <_castep_pp_path>. As long as there are no multiple 

260 files per element in this directory, the auto-detect 

261 feature should be very robust. Raises a RuntimeError 

262 if required files are not unique (multiple files per 

263 element). Non existing pseudopotentials will be 

264 generated, though this could be dangerous. 

265 

266``_rename_existing_dir`` (``=True``) : when using a new instance 

267 of the calculator, this will move directories out of 

268 the way that would be overwritten otherwise, 

269 appending a date string. 

270 

271``_set_atoms`` (``=False``) : setting this to True will overwrite 

272 any atoms object previously attached to the 

273 calculator when reading a \.castep file. By de- 

274 fault, the read() function will only create a new 

275 atoms object if none has been attached and other- 

276 wise try to assign forces etc. based on the atom's 

277 positions. ``_set_atoms=True`` could be necessary 

278 if one uses CASTEP's internal geometry optimization 

279 (``calc.param.task='GeometryOptimization'``) 

280 because then the positions get out of sync. 

281 *Warning*: this option is generally not recommended 

282 unless one knows one really needs it. There should 

283 never be any need, if CASTEP is used as a 

284 single-point calculator. 

285 

286``_track_output`` (``=False``) : if set to true, the interface 

287 will append a number to the label on all input 

288 and output files, where n is the number of calls 

289 to this instance. *Warning*: this setting may con- 

290 sume a lot more disk space because of the additio- 

291 nal \*check files. 

292 

293``_try_reuse`` (``=_track_output``) : when setting this, the in- 

294 terface will try to fetch the reuse file from the 

295 previous run even if _track_output is True. By de- 

296 fault it is equal to _track_output, but may be 

297 overridden. 

298 

299 Since this behavior may not always be desirable for 

300 single-point calculations. Regular reuse for *e.g.* 

301 a geometry-optimization can be achieved by setting 

302 ``calc.param.reuse = True``. 

303 

304``_pedantic`` (``=False``) if set to true, the calculator will 

305 inform about settings probably wasting a lot of CPU 

306 time or causing numerical inconsistencies. 

307 

308========================= ==================================================== 

309 

310Special features: 

311================= 

312 

313 

314``.dryrun_ok()`` 

315 Runs ``castep_command seed -dryrun`` in a temporary directory return True if 

316 all variables initialized ok. This is a fast way to catch errors in the 

317 input. Afterwards _kpoints_used is set. 

318 

319``.merge_param()`` 

320 Takes a filename or filehandler of a .param file or CastepParam instance and 

321 merges it into the current calculator instance, overwriting current settings 

322 

323``.keyword.clear()`` 

324 Can be used on any option like ``calc.param.keyword.clear()`` or 

325 ``calc.cell.keyword.clear()`` to return to the CASTEP default. 

326 

327``.initialize()`` 

328 Creates all needed input in the ``_directory``. This can then copied to and 

329 run in a place without ASE or even python. 

330 

331``.set_pspot('<library>')`` 

332 This automatically sets the pseudo-potential for all present species to 

333 ``<Species>_<library>.usp``. Make sure that ``_castep_pp_path`` is set 

334 correctly. Note that there is no check, if the file actually exists. If it 

335 doesn't castep will crash! You may want to use ``find_pspots()`` instead. 

336 

337``.find_pspots(pspot=<library>, suffix=<suffix>)`` 

338 This automatically searches for pseudopotentials of type 

339 ``<Species>_<library>.<suffix>`` or ``<Species>-<library>.<suffix>`` in 

340 ``castep_pp_path` (make sure this is set correctly). Note that ``<Species>`` 

341 will be searched for case insensitive. Regular expressions are accepted, and 

342 arguments ``'*'`` will be regarded as bash-like wildcards. Defaults are any 

343 ``<library>`` and any ``<suffix>`` from ``['usp', 'UPF', 'recpot']``. If you 

344 have well-organized folders with pseudopotentials of one kind, this should 

345 work with the defaults. 

346 

347``print(calc)`` 

348 Prints a short summary of the calculator settings and atoms. 

349 

350``ase.io.castep.read_seed('path-to/seed')`` 

351 Given you have a combination of seed.{param,cell,castep} this will return an 

352 atoms object with the last ionic positions in the .castep file and all other 

353 settings parsed from the .cell and .param file. If no .castep file is found 

354 the positions are taken from the .cell file. The output directory will be 

355 set to the same directory, only the label is preceded by 'copy_of\_' to 

356 avoid overwriting. 

357 

358``.set_kpts(kpoints)`` 

359 This is equivalent to initialising the calculator with 

360 ``calc = Castep(kpts=kpoints)``. ``kpoints`` can be specified in many 

361 convenient forms: simple Monkhorst-Pack grids can be specified e.g. 

362 ``(2, 2, 3)`` or ``'2 2 3'``; lists of specific weighted k-points can be 

363 given in reciprocal lattice coordinates e.g. 

364 ``[[0, 0, 0, 0.25], [0.25, 0.25, 0.25, 0.75]]``; a dictionary syntax is 

365 available for more complex requirements e.g. 

366 ``{'size': (2, 2, 2), 'gamma': True}`` will give a Gamma-centered 2x2x2 M-P 

367 grid, ``{'density': 10, 'gamma': False, 'even': False}`` will give a mesh 

368 with density of at least 10 Ang (based on the unit cell of currently-attached 

369 atoms) with an odd number of points in each direction and avoiding the Gamma 

370 point. 

371 

372``.set_bandpath(bandpath)`` 

373 This is equivalent to initialialising the calculator with 

374 ``calc=Castep(bandpath=bandpath)`` and may be set simultaneously with *kpts*. 

375 It allows an electronic band structure path to be set up using ASE BandPath 

376 objects. This enables a band structure calculation to be set up conveniently 

377 using e.g. calc.set_bandpath(atoms.cell.bandpath().interpolate(npoints=200)) 

378 

379``.band_structure(bandfile=None)`` 

380 Read a band structure from _seedname.bands_ file. This returns an ase 

381 BandStructure object which may be plotted with e.g. 

382 ``calc.band_structure().plot()`` 

383 

384Notes/Issues: 

385============== 

386 

387* Currently *only* the FixAtoms *constraint* is fully supported for reading and 

388 writing. There is some experimental support for the FixCartesian constraint. 

389 

390* There is no support for the CASTEP *unit system*. Units of eV and Angstrom 

391 are used throughout. In particular when converting total energies from 

392 different calculators, one should check that the same CODATA_ version is 

393 used for constants and conversion factors, respectively. 

394 

395.. _CASTEP: http://www.castep.org/ 

396 

397.. _W: https://en.wikipedia.org/wiki/CASTEP 

398 

399.. _CODATA: https://physics.nist.gov/cuu/Constants/index.html 

400 

401.. [1] S. J. Clark, M. D. Segall, C. J. Pickard, P. J. Hasnip, M. J. Probert, 

402 K. Refson, M. C. Payne Zeitschrift für Kristallographie 220(5-6) 

403 pp.567- 570 (2005) PDF_. 

404 

405.. _PDF: http://www.tcm.phy.cam.ac.uk/castep/papers/ZKristallogr_2005.pdf 

406 

407 

408End CASTEP Interface Documentation 

409 """ 

410 

411 # Class attributes ! 

412 # keys set through atoms object 

413 atoms_keys = [ 

414 'charges', 

415 'ionic_constraints', 

416 'lattice_abs', 

417 'lattice_cart', 

418 'positions_abs', 

419 'positions_abs_final', 

420 'positions_abs_intermediate', 

421 'positions_frac', 

422 'positions_frac_final', 

423 'positions_frac_intermediate'] 

424 

425 atoms_obj_keys = [ 

426 'dipole', 

427 'energy_free', 

428 'energy_zero', 

429 'fermi', 

430 'forces', 

431 'nbands', 

432 'positions', 

433 'stress', 

434 'pressure'] 

435 

436 internal_keys = [ 

437 '_castep_command', 

438 '_check_checkfile', 

439 '_copy_pspots', 

440 '_link_pspots', 

441 '_find_pspots', 

442 '_build_missing_pspots', 

443 '_directory', 

444 '_export_settings', 

445 '_force_write', 

446 '_label', 

447 '_prepare_input_only', 

448 '_castep_pp_path', 

449 '_rename_existing_dir', 

450 '_set_atoms', 

451 '_track_output', 

452 '_try_reuse', 

453 '_pedantic'] 

454 

455 implemented_properties = [ 

456 'energy', 

457 'free_energy', 

458 'forces', 

459 'stress', 

460 'charges', 

461 'magmoms', 

462 ] 

463 

464 # specific to this calculator 

465 implemented_properties += [ 

466 'energy_without_dispersion_correction', 

467 'free_energy_without_dispersion_correction', 

468 'energy_zero_without_dispersion_correction', 

469 'energy_with_dispersion_correction', 

470 'free_energy_with_dispersion_correction', 

471 'energy_zero_with_dispersion_correction', 

472 'energy_with_finite_basis_set_correction', 

473 'pressure', 

474 'hirshfeld_volume_ratios', 

475 'hirshfeld_charges', 

476 'hirshfeld_magmoms', 

477 ] 

478 

479 def __init__(self, directory='CASTEP', label='castep', 

480 castep_command=None, check_castep_version=False, 

481 castep_pp_path=None, find_pspots=False, keyword_tolerance=1, 

482 castep_keywords=None, **kwargs): 

483 

484 self.results = {} 

485 

486 from ase.io.castep import write_cell 

487 self._write_cell = write_cell 

488 

489 if castep_keywords is None: 

490 castep_keywords = CastepKeywords(make_param_dict(), 

491 make_cell_dict(), 

492 [], 

493 [], 

494 0) 

495 if keyword_tolerance < 3: 

496 try: 

497 castep_keywords = import_castep_keywords(castep_command) 

498 except CastepVersionError as e: 

499 if keyword_tolerance == 0: 

500 raise e 

501 else: 

502 warnings.warn(str(e)) 

503 

504 self._kw_tol = keyword_tolerance 

505 keyword_tolerance = max(keyword_tolerance, 2) # 3 not accepted below 

506 self.param = CastepParam(castep_keywords, 

507 keyword_tolerance=keyword_tolerance) 

508 self.cell = CastepCell(castep_keywords, 

509 keyword_tolerance=keyword_tolerance) 

510 

511 ################################### 

512 # Calculator state variables # 

513 ################################### 

514 self._calls = 0 

515 self._castep_version = castep_keywords.castep_version 

516 

517 # collects warning from .castep files 

518 self._warnings = [] 

519 # collects content from *.err file 

520 self._error = None 

521 # warnings raised by the ASE interface 

522 self._interface_warnings = [] 

523 

524 # store to check if recalculation is necessary 

525 self._old_atoms = None 

526 self._old_cell = None 

527 self._old_param = None 

528 

529 ################################### 

530 # Internal keys # 

531 # Allow to tweak the behavior # 

532 ################################### 

533 self._opt = {} 

534 self._castep_command = get_castep_command(castep_command) 

535 self._castep_pp_path = get_castep_pp_path(castep_pp_path) 

536 self._check_checkfile = True 

537 self._copy_pspots = False 

538 self._link_pspots = True 

539 self._find_pspots = find_pspots 

540 self._build_missing_pspots = True 

541 self._directory = os.path.abspath(directory) 

542 self._export_settings = True 

543 self._force_write = True 

544 self._label = label 

545 self._prepare_input_only = False 

546 self._rename_existing_dir = True 

547 self._set_atoms = False 

548 self._track_output = False 

549 self._try_reuse = False 

550 

551 # turn off the pedantic user warnings 

552 self._pedantic = False 

553 

554 # will be set on during runtime 

555 self._seed = None 

556 

557 ################################### 

558 # (Physical) result variables # 

559 ################################### 

560 self.atoms = None 

561 # initialize result variables 

562 self._eigenvalues = None 

563 self._efermi = None 

564 self._ibz_kpts = None 

565 self._ibz_weights = None 

566 self._band_structure = None 

567 

568 self._number_of_cell_constraints = None 

569 self._output_verbosity = None 

570 self._unit_cell = None 

571 self._kpoints = None 

572 

573 # pointers to other files used at runtime 

574 self._check_file = None 

575 self._castep_bin_file = None 

576 

577 # plane wave cutoff energy (may be derived during PP generation) 

578 self._cut_off_energy = None 

579 

580 # runtime information 

581 self._total_time = None 

582 self._peak_memory = None 

583 

584 # check version of CASTEP options module against current one 

585 if check_castep_version: 

586 local_castep_version = get_castep_version(self._castep_command) 

587 if not hasattr(self, '_castep_version'): 

588 warnings.warn('No castep version found') 

589 return 

590 if local_castep_version != self._castep_version: 

591 warnings.warn( 

592 'The options module was generated from version %s ' 

593 'while your are currently using CASTEP version %s' % 

594 (self._castep_version, 

595 get_castep_version(self._castep_command))) 

596 self._castep_version = local_castep_version 

597 

598 # processes optional arguments in kw style 

599 for keyword, value in kwargs.items(): 

600 # first fetch special keywords issued by ASE CLI 

601 if keyword == 'kpts': 

602 self.set_kpts(value) 

603 elif keyword == 'bandpath': 

604 self.set_bandpath(value) 

605 elif keyword == 'xc': 

606 self.xc_functional = value 

607 elif keyword == 'ecut': 

608 self.cut_off_energy = value 

609 else: # the general case 

610 self.__setattr__(keyword, value) 

611 

612 # TODO: to be self.use_cache = True after revising `__setattr__` 

613 self.__dict__['use_cache'] = True 

614 

615 def set_atoms(self, atoms): 

616 self.atoms = atoms 

617 

618 def get_atoms(self): 

619 if self.atoms is None: 

620 raise ValueError('Calculator has no atoms') 

621 atoms = self.atoms.copy() 

622 atoms.calc = self 

623 return atoms 

624 

625 def _get_name(self) -> str: 

626 return self.__class__.__name__ 

627 

628 def band_structure(self, bandfile=None): 

629 from ase.spectrum.band_structure import BandStructure 

630 

631 if bandfile is None: 

632 bandfile = os.path.join(self._directory, self._seed) + '.bands' 

633 

634 if not os.path.exists(bandfile): 

635 raise ValueError(f'Cannot find band file "{bandfile}".') 

636 

637 kpts, weights, eigenvalues, efermi = read_bands(bandfile) 

638 

639 # Get definitions of high-symmetry points 

640 special_points = self.atoms.cell.bandpath(npoints=0).special_points 

641 bandpath = BandPath(self.atoms.cell, 

642 kpts=kpts, 

643 special_points=special_points) 

644 return BandStructure(bandpath, eigenvalues, reference=efermi) 

645 

646 def set_bandpath(self, bandpath): 

647 """Set a band structure path from ase.dft.kpoints.BandPath object 

648 

649 This will set the bs_kpoint_list block with a set of specific points 

650 determined in ASE. bs_kpoint_spacing will not be used; to modify the 

651 number of points, consider using e.g. bandpath.resample(density=20) to 

652 obtain a new dense path. 

653 

654 Args: 

655 bandpath (:obj:`ase.dft.kpoints.BandPath` or None): 

656 Set to None to remove list of band structure points. Otherwise, 

657 sampling will follow BandPath parameters. 

658 

659 """ 

660 

661 def clear_bs_keywords(): 

662 bs_keywords = product({'bs_kpoint', 'bs_kpoints'}, 

663 {'path', 'path_spacing', 

664 'list', 

665 'mp_grid', 'mp_spacing', 'mp_offset'}) 

666 for bs_tag in bs_keywords: 

667 setattr(self.cell, '_'.join(bs_tag), None) 

668 

669 if bandpath is None: 

670 clear_bs_keywords() 

671 elif isinstance(bandpath, BandPath): 

672 clear_bs_keywords() 

673 self.cell.bs_kpoint_list = [' '.join(map(str, row)) 

674 for row in bandpath.kpts] 

675 else: 

676 raise TypeError('Band structure path must be an ' 

677 'ase.dft.kpoint.BandPath object') 

678 

679 def set_kpts(self, kpts): 

680 """Set k-point mesh/path using a str, tuple or ASE features 

681 

682 Args: 

683 kpts (None, tuple, str, dict): 

684 

685 This method will set the CASTEP parameters kpoints_mp_grid, 

686 kpoints_mp_offset and kpoints_mp_spacing as appropriate. Unused 

687 parameters will be set to None (i.e. not included in input files.) 

688 

689 If kpts=None, all these parameters are set as unused. 

690 

691 The simplest useful case is to give a 3-tuple of integers specifying 

692 a Monkhorst-Pack grid. This may also be formatted as a string separated 

693 by spaces; this is the format used internally before writing to the 

694 input files. 

695 

696 A more powerful set of features is available when using a python 

697 dictionary with the following allowed keys: 

698 

699 - 'size' (3-tuple of int) mesh of mesh dimensions 

700 - 'density' (float) for BZ sampling density in points per recip. Ang 

701 ( kpoint_mp_spacing = 1 / (2pi * density) ). An explicit MP mesh will 

702 be set to allow for rounding/centering. 

703 - 'spacing' (float) for BZ sampling density for maximum space between 

704 sample points in reciprocal space. This is numerically equivalent to 

705 the inbuilt ``calc.cell.kpoint_mp_spacing``, but will be converted to 

706 'density' to allow for rounding/centering. 

707 - 'even' (bool) to round each direction up to the nearest even number; 

708 set False for odd numbers, leave as None for no odd/even rounding. 

709 - 'gamma' (bool) to offset the Monkhorst-Pack grid to include 

710 (0, 0, 0); set False to offset each direction avoiding 0. 

711 """ 

712 

713 def clear_mp_keywords(): 

714 mp_keywords = product({'kpoint', 'kpoints'}, 

715 {'mp_grid', 'mp_offset', 

716 'mp_spacing', 'list'}) 

717 for kp_tag in mp_keywords: 

718 setattr(self.cell, '_'.join(kp_tag), None) 

719 

720 # Case 1: Clear parameters with set_kpts(None) 

721 if kpts is None: 

722 clear_mp_keywords() 

723 

724 # Case 2: list of explicit k-points with weights 

725 # e.g. [[ 0, 0, 0, 0.125], 

726 # [ 0, -0.5, 0, 0.375], 

727 # [-0.5, 0, -0.5, 0.375], 

728 # [-0.5, -0.5, -0.5, 0.125]] 

729 

730 elif (isinstance(kpts, (tuple, list)) 

731 and isinstance(kpts[0], (tuple, list))): 

732 

733 if not all(map((lambda row: len(row) == 4), kpts)): 

734 raise ValueError( 

735 'In explicit kpt list each row should have 4 elements') 

736 

737 clear_mp_keywords() 

738 self.cell.kpoint_list = [' '.join(map(str, row)) for row in kpts] 

739 

740 # Case 3: list of explicit kpts formatted as list of str 

741 # i.e. the internal format of calc.kpoint_list split on \n 

742 # e.g. ['0 0 0 0.125', '0 -0.5 0 0.375', '-0.5 0 -0.5 0.375'] 

743 elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], str): 

744 

745 if not all(map((lambda row: len(row.split()) == 4), kpts)): 

746 raise ValueError( 

747 'In explicit kpt list each row should have 4 elements') 

748 

749 clear_mp_keywords() 

750 self.cell.kpoint_list = kpts 

751 

752 # Case 4: list or tuple of MP samples e.g. [3, 3, 2] 

753 elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], int): 

754 if len(kpts) != 3: 

755 raise ValueError('Monkhorst-pack grid should have 3 values') 

756 clear_mp_keywords() 

757 self.cell.kpoint_mp_grid = '%d %d %d' % tuple(kpts) 

758 

759 # Case 5: str representation of Case 3 e.g. '3 3 2' 

760 elif isinstance(kpts, str): 

761 self.set_kpts([int(x) for x in kpts.split()]) 

762 

763 # Case 6: dict of options e.g. {'size': (3, 3, 2), 'gamma': True} 

764 # 'spacing' is allowed but transformed to 'density' to get mesh/offset 

765 elif isinstance(kpts, dict): 

766 kpts = kpts.copy() 

767 

768 if (kpts.get('spacing') is not None 

769 and kpts.get('density') is not None): 

770 raise ValueError( 

771 'Cannot set kpts spacing and density simultaneously.') 

772 else: 

773 if kpts.get('spacing') is not None: 

774 kpts = kpts.copy() 

775 spacing = kpts.pop('spacing') 

776 kpts['density'] = 1 / (2 * np.pi * spacing) 

777 

778 clear_mp_keywords() 

779 size, offsets = kpts2sizeandoffsets(atoms=self.atoms, **kpts) 

780 self.cell.kpoint_mp_grid = '%d %d %d' % tuple(size) 

781 self.cell.kpoint_mp_offset = '%f %f %f' % tuple(offsets) 

782 

783 # Case 7: some other iterator. Try treating as a list: 

784 elif hasattr(kpts, '__iter__'): 

785 self.set_kpts(list(kpts)) 

786 

787 # Otherwise, give up 

788 else: 

789 raise TypeError('Cannot interpret kpts of this type') 

790 

791 def todict(self, skip_default=True): 

792 """Create dict with settings of .param and .cell""" 

793 dct = {} 

794 dct['param'] = self.param.get_attr_dict() 

795 dct['cell'] = self.cell.get_attr_dict() 

796 

797 return dct 

798 

799 def check_state(self, atoms, tol=1e-15): 

800 """Check for system changes since last calculation.""" 

801 return compare_atoms(self._old_atoms, atoms) 

802 

803 def _castep_find_last_record(self, castep_file): 

804 """Checks wether a given castep file has a regular 

805 ending message following the last banner message. If this 

806 is the case, the line number of the last banner is message 

807 is return, otherwise False. 

808 

809 returns (record_start, record_end, end_found, last_record_complete) 

810 """ 

811 if isinstance(castep_file, str): 

812 castep_file = paropen(castep_file, 'r') 

813 file_opened = True 

814 else: 

815 file_opened = False 

816 record_starts = [] 

817 while True: 

818 line = castep_file.readline() 

819 if (('Welcome' in line or 'Materials Studio' in line) 

820 and 'CASTEP' in line): 

821 record_starts = [castep_file.tell()] + record_starts 

822 if not line: 

823 break 

824 

825 if record_starts == []: 

826 warnings.warn('Could not find CASTEP label in result file: %s.' 

827 ' Are you sure this is a .castep file?' % castep_file) 

828 return None 

829 

830 # search for regular end of file 

831 end_found = False 

832 # start to search from record beginning from the back 

833 # and see if 

834 record_end = -1 

835 for record_nr, record_start in enumerate(record_starts): 

836 castep_file.seek(record_start) 

837 while True: 

838 line = castep_file.readline() 

839 if not line: 

840 break 

841 if 'warn' in line.lower(): 

842 self._warnings.append(line) 

843 

844 if 'Finalisation time =' in line: 

845 end_found = True 

846 record_end = castep_file.tell() 

847 break 

848 

849 if end_found: 

850 break 

851 

852 if file_opened: 

853 castep_file.close() 

854 

855 if end_found: 

856 # record_nr == 0 corresponds to the last record here 

857 if record_nr == 0: 

858 return (record_start, record_end, True, True) 

859 else: 

860 return (record_start, record_end, True, False) 

861 else: 

862 return (0, record_end, False, False) 

863 

864 def read(self, castep_file=None): 

865 """Read a castep file into the current instance.""" 

866 

867 _close = True 

868 

869 if castep_file is None: 

870 if self._castep_file: 

871 castep_file = self._castep_file 

872 out = paropen(castep_file, 'r') 

873 else: 

874 warnings.warn('No CASTEP file specified') 

875 return 

876 if not os.path.exists(castep_file): 

877 warnings.warn('No CASTEP file found') 

878 

879 elif isinstance(castep_file, str): 

880 out = paropen(castep_file, 'r') 

881 

882 else: 

883 # in this case we assume that we have a fileobj already, but check 

884 # for attributes in order to avoid extended EAFP blocks. 

885 out = castep_file 

886 

887 # look before you leap... 

888 attributes = ['name', 

889 'seek', 

890 'close', 

891 'readline', 

892 'tell'] 

893 

894 for attr in attributes: 

895 if not hasattr(out, attr): 

896 raise TypeError( 

897 '"castep_file" is neither str nor valid fileobj') 

898 

899 castep_file = out.name 

900 _close = False 

901 

902 if self._seed is None: 

903 self._seed = os.path.splitext(os.path.basename(castep_file))[0] 

904 

905 err_file = f'{self._seed}.0001.err' 

906 if os.path.exists(err_file): 

907 err_file = paropen(err_file) 

908 self._error = err_file.read() 

909 err_file.close() 

910 # we return right-away because it might 

911 # just be here from a previous run 

912 # look for last result, if several CASTEP 

913 # run are appended 

914 

915 record_start, record_end, end_found, _\ 

916 = self._castep_find_last_record(out) 

917 if not end_found: 

918 warnings.warn( 

919 f'No regular end found in {castep_file} file. {self._error}') 

920 if _close: 

921 out.close() 

922 return 

923 # we return here, because the file has no a regular end 

924 

925 # now iterate over last CASTEP output in file to extract information 

926 # could be generalized as well to extract trajectory from file 

927 # holding several outputs 

928 n_cell_const = 0 

929 

930 kpoints = None 

931 

932 out.seek(record_start) 

933 

934 # read header 

935 parameters_header = _read_header(out) 

936 if 'cut_off_energy' in parameters_header: 

937 self._cut_off_energy = parameters_header['cut_off_energy'] 

938 if 'basis_precision' in parameters_header: 

939 del parameters_header['cut_off_energy'] # avoid conflict 

940 for k, v in parameters_header.items(): 

941 setattr(self.param, k, v) 

942 

943 while True: 

944 # TODO: add a switch if we have a geometry optimization: record 

945 # atoms objects for intermediate steps. 

946 try: 

947 line = out.readline() 

948 if not line or out.tell() > record_end: 

949 break 

950 elif 'Number of kpoints used' in line: 

951 kpoints = int(line.split('=')[-1].strip()) 

952 elif 'Unit Cell' in line: 

953 lattice_real = _read_unit_cell(out) 

954 elif 'Cell Contents' in line: 

955 while True: 

956 line = out.readline() 

957 if 'Total number of ions in cell' in line: 

958 n_atoms = int(line.split()[7]) 

959 if 'Total number of species in cell' in line: 

960 int(line.split()[7]) 

961 fields = line.split() 

962 if len(fields) == 0: 

963 break 

964 elif 'Fractional coordinates of atoms' in line: 

965 species, custom_species, positions_frac = \ 

966 _read_fractional_coordinates(out, n_atoms) 

967 elif 'Files used for pseudopotentials' in line: 

968 while True: 

969 line = out.readline() 

970 if 'Pseudopotential generated on-the-fly' in line: 

971 continue 

972 fields = line.split() 

973 if (len(fields) >= 2): 

974 elem, pp_file = fields 

975 self.cell.species_pot = (elem, pp_file) 

976 else: 

977 break 

978 elif 'k-Points For BZ Sampling' in line: 

979 # TODO: generalize for non-Monkhorst Pack case 

980 # (i.e. kpoint lists) - 

981 # kpoints_offset cannot be read this way and 

982 # is hence always set to None 

983 while True: 

984 line = out.readline() 

985 if not line.strip(): 

986 break 

987 if 'MP grid size for SCF calculation' in line: 

988 # kpoints = ' '.join(line.split()[-3:]) 

989 # self.kpoints_mp_grid = kpoints 

990 # self.kpoints_mp_offset = '0. 0. 0.' 

991 # not set here anymore because otherwise 

992 # two calculator objects go out of sync 

993 # after each calculation triggering unnecessary 

994 # recalculation 

995 break 

996 elif 'Number of cell constraints' in line: 

997 n_cell_const = int(line.split()[4]) 

998 

999 elif 'Final energy' in line: 

1000 key = 'energy_without_dispersion_correction' 

1001 self.results[key] = float(line.split()[-2]) 

1002 elif 'Final free energy' in line: 

1003 key = 'free_energy_without_dispersion_correction' 

1004 self.results[key] = float(line.split()[-2]) 

1005 elif 'NB est. 0K energy' in line: 

1006 key = 'energy_zero_without_dispersion_correction' 

1007 self.results[key] = float(line.split()[-2]) 

1008 

1009 # Add support for dispersion correction 

1010 # filtering due to SEDC is done in get_potential_energy 

1011 elif 'Dispersion corrected final energy' in line: 

1012 key = 'energy_with_dispersion_correlation' 

1013 self.results[key] = float(line.split()[-2]) 

1014 elif 'Dispersion corrected final free energy' in line: 

1015 key = 'free_energy_with_dispersion_correlation' 

1016 self.results[key] = float(line.split()[-2]) 

1017 elif 'NB dispersion corrected est. 0K energy' in line: 

1018 key = 'energy_zero_with_dispersion_correlation' 

1019 self.results[key] = float(line.split()[-2]) 

1020 

1021 # check if we had a finite basis set correction 

1022 elif 'Total energy corrected for finite basis set' in line: 

1023 key = 'energy_with_finite_basis_set_correction' 

1024 self.results[key] = float(line.split()[-2]) 

1025 

1026 # ******************** Forces ********************* 

1027 # ************** Symmetrised Forces *************** 

1028 # ******************** Constrained Forces ******************** 

1029 # ******************* Unconstrained Forces ******************* 

1030 elif re.search(r'\**.* Forces \**', line): 

1031 forces, constraints = _read_forces(out, n_atoms) 

1032 self.results['forces'] = np.array(forces) 

1033 

1034 # ***************** Stress Tensor ***************** 

1035 # *********** Symmetrised Stress Tensor *********** 

1036 elif re.search(r'\**.* Stress Tensor \**', line): 

1037 self.results.update(_read_stress(out)) 

1038 

1039 elif ('BFGS: starting iteration' in line 

1040 or 'BFGS: improving iteration' in line): 

1041 if n_cell_const < 6: 

1042 lattice_real = [] 

1043 # backup previous configuration first: 

1044 # for highly symmetric systems (where essentially only the 

1045 # stress is optimized, but the atomic positions) positions 

1046 # are only printed once. 

1047 if species: 

1048 prev_species = deepcopy(species) 

1049 if positions_frac: 

1050 prev_positions_frac = deepcopy(positions_frac) 

1051 species = [] 

1052 positions_frac = [] 

1053 

1054 self.results = {} 

1055 

1056 # extract info from the Mulliken analysis 

1057 elif 'Atomic Populations' in line: 

1058 self.results.update(_read_mulliken_charges(out)) 

1059 

1060 # extract detailed Hirshfeld analysis (iprint > 1) 

1061 elif 'Hirshfeld total electronic charge (e)' in line: 

1062 self.results.update(_read_hirshfeld_details(out, n_atoms)) 

1063 

1064 elif 'Hirshfeld Analysis' in line: 

1065 self.results.update(_read_hirshfeld_charges(out)) 

1066 

1067 # There is actually no good reason to get out of the loop 

1068 # already at this point... or do I miss something? 

1069 # elif 'BFGS: Final Configuration:' in line: 

1070 # break 

1071 elif 'warn' in line.lower(): 

1072 self._warnings.append(line) 

1073 

1074 # fetch some last info 

1075 elif 'Total time' in line: 

1076 pattern = r'.*=\s*([\d\.]+) s' 

1077 self._total_time = float(re.search(pattern, line).group(1)) 

1078 

1079 elif 'Peak Memory Use' in line: 

1080 pattern = r'.*=\s*([\d]+) kB' 

1081 self._peak_memory = int(re.search(pattern, line).group(1)) 

1082 

1083 except Exception as exception: 

1084 sys.stderr.write(line + '|-> line triggered exception: ' 

1085 + str(exception)) 

1086 raise 

1087 

1088 if _close: 

1089 out.close() 

1090 

1091 _set_energy_and_free_energy(self.results) 

1092 

1093 # in highly summetric crystals, positions and symmetry are only printed 

1094 # upon init, hence we here restore these original values 

1095 if not positions_frac: 

1096 positions_frac = prev_positions_frac 

1097 if not species: 

1098 species = prev_species 

1099 

1100 positions_frac_atoms = np.array(positions_frac) 

1101 

1102 if self.atoms and not self._set_atoms: 

1103 # compensate for internal reordering of atoms by CASTEP 

1104 # using the fact that the order is kept within each species 

1105 

1106 indices = _get_indices_to_sort_back(self.atoms.symbols, species) 

1107 positions_frac_atoms = positions_frac_atoms[indices] 

1108 keys = [ 

1109 'forces', 

1110 'charges', 

1111 'magmoms', 

1112 'hirshfeld_volume_ratios', 

1113 'hirshfeld_charges', 

1114 'hirshfeld_magmoms', 

1115 ] 

1116 for k in keys: 

1117 if k not in self.results: 

1118 continue 

1119 self.results[k] = self.results[k][indices] 

1120 

1121 self.atoms.set_scaled_positions(positions_frac_atoms) 

1122 

1123 else: 

1124 # If no atoms, object has been previously defined 

1125 # we define it here and set the Castep() instance as calculator. 

1126 # This covers the case that we simply want to open a .castep file. 

1127 

1128 # The next time around we will have an atoms object, since 

1129 # set_calculator also set atoms in the calculator. 

1130 if self.atoms: 

1131 constraints = self.atoms.constraints 

1132 atoms = Atoms( 

1133 species, 

1134 cell=lattice_real, 

1135 constraint=constraints, 

1136 pbc=True, 

1137 scaled_positions=positions_frac, 

1138 ) 

1139 if custom_species is not None: 

1140 atoms.new_array('castep_custom_species', 

1141 np.array(custom_species)) 

1142 

1143 atoms.set_initial_charges(self.results.get('charges')) 

1144 atoms.set_initial_magnetic_moments(self.results.get('magmoms')) 

1145 

1146 atoms.calc = self 

1147 

1148 self._kpoints = kpoints 

1149 

1150 if self._warnings: 

1151 warnings.warn(f'WARNING: {castep_file} contains warnings') 

1152 for warning in self._warnings: 

1153 warnings.warn(warning) 

1154 # reset 

1155 self._warnings = [] 

1156 

1157 # Read in eigenvalues from bands file 

1158 bands_file = castep_file[:-7] + '.bands' 

1159 if (self.param.task.value is not None 

1160 and self.param.task.value.lower() == 'bandstructure'): 

1161 self._band_structure = self.band_structure(bandfile=bands_file) 

1162 else: 

1163 try: 

1164 (self._ibz_kpts, 

1165 self._ibz_weights, 

1166 self._eigenvalues, 

1167 self._efermi) = read_bands(filename=bands_file) 

1168 except FileNotFoundError: 

1169 warnings.warn('Could not load .bands file, eigenvalues and ' 

1170 'Fermi energy are unknown') 

1171 

1172 # TODO: deprecate once inheriting BaseCalculator 

1173 def get_hirsh_volrat(self): 

1174 """ 

1175 Return the Hirshfeld volume ratios. 

1176 """ 

1177 return self.results.get('hirshfeld_volume_ratios') 

1178 

1179 # TODO: deprecate once inheriting BaseCalculator 

1180 def get_spins(self): 

1181 """ 

1182 Return the spins from a plane-wave Mulliken analysis. 

1183 """ 

1184 return self.results['magmoms'] 

1185 

1186 # TODO: deprecate once inheriting BaseCalculator 

1187 def get_mulliken_charges(self): 

1188 """ 

1189 Return the charges from a plane-wave Mulliken analysis. 

1190 """ 

1191 return self.results['charges'] 

1192 

1193 # TODO: deprecate once inheriting BaseCalculator 

1194 def get_hirshfeld_charges(self): 

1195 """ 

1196 Return the charges from a Hirshfeld analysis. 

1197 """ 

1198 return self.results.get('hirshfeld_charges') 

1199 

1200 def get_total_time(self): 

1201 """ 

1202 Return the total runtime 

1203 """ 

1204 return self._total_time 

1205 

1206 def get_peak_memory(self): 

1207 """ 

1208 Return the peak memory usage 

1209 """ 

1210 return self._peak_memory 

1211 

1212 def set_label(self, label): 

1213 """The label is part of each seed, which in turn is a prefix 

1214 in each CASTEP related file. 

1215 """ 

1216 # we may think about changing this in future to set `self._directory` 

1217 # and `self._label`, as one would expect 

1218 self._label = label 

1219 

1220 def set_pspot(self, pspot, elems=None, 

1221 notelems=None, 

1222 clear=True, 

1223 suffix='usp'): 

1224 """Quickly set all pseudo-potentials: Usually CASTEP psp are named 

1225 like <Elem>_<pspot>.<suffix> so this function function only expects 

1226 the <LibraryName>. It then clears any previous pseudopotential 

1227 settings apply the one with <LibraryName> for each element in the 

1228 atoms object. The optional elems and notelems arguments can be used 

1229 to exclusively assign to some species, or to exclude with notelemens. 

1230 

1231 Parameters :: 

1232 

1233 - elems (None) : set only these elements 

1234 - notelems (None): do not set the elements 

1235 - clear (True): clear previous settings 

1236 - suffix (usp): PP file suffix 

1237 """ 

1238 if self._find_pspots: 

1239 if self._pedantic: 

1240 warnings.warn('Warning: <_find_pspots> = True. ' 

1241 'Do you really want to use `set_pspots()`? ' 

1242 'This does not check whether the PP files exist. ' 

1243 'You may rather want to use `find_pspots()` with ' 

1244 'the same <pspot>.') 

1245 

1246 if clear and not elems and not notelems: 

1247 self.cell.species_pot.clear() 

1248 for elem in set(self.atoms.get_chemical_symbols()): 

1249 if elems is not None and elem not in elems: 

1250 continue 

1251 if notelems is not None and elem in notelems: 

1252 continue 

1253 self.cell.species_pot = (elem, f'{elem}_{pspot}.{suffix}') 

1254 

1255 def find_pspots(self, pspot='.+', elems=None, 

1256 notelems=None, clear=True, suffix='(usp|UPF|recpot)'): 

1257 r"""Quickly find and set all pseudo-potentials by searching in 

1258 castep_pp_path: 

1259 

1260 This one is more flexible than set_pspots, and also checks if the files 

1261 are actually available from the castep_pp_path. 

1262 

1263 Essentially, the function parses the filenames in <castep_pp_path> and 

1264 does a regex matching. The respective pattern is: 

1265 

1266 r"^(<elem>|<elem.upper()>|elem.lower()>(_|-)<pspot>\.<suffix>$" 

1267 

1268 In most cases, it will be sufficient to not specify anything, if you 

1269 use standard CASTEP USPPs with only one file per element in the 

1270 <castep_pp_path>. 

1271 

1272 The function raises a `RuntimeError` if there is some ambiguity 

1273 (multiple files per element). 

1274 

1275 Parameters :: 

1276 

1277 - pspots ('.+') : as defined above, will be a wildcard if not 

1278 specified. 

1279 - elems (None) : set only these elements 

1280 - notelems (None): do not set the elements 

1281 - clear (True): clear previous settings 

1282 - suffix (usp|UPF|recpot): PP file suffix 

1283 """ 

1284 if clear and not elems and not notelems: 

1285 self.cell.species_pot.clear() 

1286 

1287 if not os.path.isdir(self._castep_pp_path): 

1288 if self._pedantic: 

1289 warnings.warn( 

1290 'Cannot search directory: {} Folder does not exist' 

1291 .format(self._castep_pp_path)) 

1292 return 

1293 

1294 # translate the bash wildcard syntax to regex 

1295 if pspot == '*': 

1296 pspot = '.*' 

1297 if suffix == '*': 

1298 suffix = '.*' 

1299 if pspot == '*': 

1300 pspot = '.*' 

1301 

1302 # GBRV USPPs have a strnage naming schme 

1303 pattern = r'^({elem}|{elem_upper}|{elem_lower})(_|-){pspot}\.{suffix}$' 

1304 

1305 for elem in set(self.atoms.get_chemical_symbols()): 

1306 if elems is not None and elem not in elems: 

1307 continue 

1308 if notelems is not None and elem in notelems: 

1309 continue 

1310 p = pattern.format(elem=elem, 

1311 elem_upper=elem.upper(), 

1312 elem_lower=elem.lower(), 

1313 pspot=pspot, 

1314 suffix=suffix) 

1315 pps = [] 

1316 for f in os.listdir(self._castep_pp_path): 

1317 if re.match(p, f): 

1318 pps.append(f) 

1319 if not pps: 

1320 if self._pedantic: 

1321 warnings.warn('Pseudopotential for species {} not found!' 

1322 .format(elem)) 

1323 elif len(pps) != 1: 

1324 raise RuntimeError( 

1325 'Pseudopotential for species ''{} not unique!\n' 

1326 .format(elem) 

1327 + 'Found the following files in {}\n' 

1328 .format(self._castep_pp_path) 

1329 + '\n'.join([f' {pp}' for pp in pps]) + 

1330 '\nConsider a stricter search pattern in `find_pspots()`.') 

1331 else: 

1332 self.cell.species_pot = (elem, pps[0]) 

1333 

1334 @_self_getter 

1335 def get_total_energy(self, atoms): 

1336 """Run CASTEP calculation if needed and return total energy.""" 

1337 self.update(atoms) 

1338 return self.results.get('energy_without_dispersion_correction') 

1339 

1340 @_self_getter 

1341 def get_total_energy_corrected(self, atoms): 

1342 """Run CASTEP calculation if needed and return total energy.""" 

1343 self.update(atoms) 

1344 return self.results.get('energy_with_finite_basis_set_correction') 

1345 

1346 @_self_getter 

1347 def get_free_energy(self, atoms): 

1348 """Run CASTEP calculation if needed and return free energy. 

1349 Only defined with smearing.""" 

1350 self.update(atoms) 

1351 return self.results.get('free_energy_without_dispersion_correction') 

1352 

1353 @_self_getter 

1354 def get_0K_energy(self, atoms): 

1355 """Run CASTEP calculation if needed and return 0K energy. 

1356 Only defined with smearing.""" 

1357 self.update(atoms) 

1358 return self.results.get('energy_zero_without_dispersion_correction') 

1359 

1360 @_self_getter 

1361 def get_pressure(self, atoms): 

1362 """Return the pressure.""" 

1363 self.update(atoms) 

1364 return self.results.get('pressure') 

1365 

1366 @_self_getter 

1367 def get_unit_cell(self, atoms): 

1368 """Return the unit cell.""" 

1369 self.update(atoms) 

1370 return self._unit_cell 

1371 

1372 @_self_getter 

1373 def get_kpoints(self, atoms): 

1374 """Return the kpoints.""" 

1375 self.update(atoms) 

1376 return self._kpoints 

1377 

1378 @_self_getter 

1379 def get_number_cell_constraints(self, atoms): 

1380 """Return the number of cell constraints.""" 

1381 self.update(atoms) 

1382 return self._number_of_cell_constraints 

1383 

1384 def update(self, atoms): 

1385 """Checks if atoms object or calculator changed and 

1386 runs calculation if so. 

1387 """ 

1388 if self.calculation_required(atoms, None): 

1389 self.calculate(atoms, [], []) 

1390 

1391 def calculation_required(self, atoms, properties): 

1392 """Checks wether anything changed in the atoms object or CASTEP 

1393 settings since the last calculation using this instance. 

1394 """ 

1395 # SPR: what happens with the atoms parameter here? Why don't we use it? 

1396 # from all that I can tell we need to compare against atoms instead of 

1397 # self.atoms 

1398 # if not self.atoms == self._old_atoms: 

1399 if atoms != self._old_atoms: 

1400 return True 

1401 if self._old_param is None or self._old_cell is None: 

1402 return True 

1403 if self.param._options != self._old_param._options: 

1404 return True 

1405 if self.cell._options != self._old_cell._options: 

1406 return True 

1407 return False 

1408 

1409 def calculate(self, atoms, properties, system_changes): 

1410 """Write all necessary input file and call CASTEP.""" 

1411 self.prepare_input_files(atoms, force_write=self._force_write) 

1412 if not self._prepare_input_only: 

1413 self.run() 

1414 self.read() 

1415 

1416 # we need to push the old state here! 

1417 # although run() pushes it, read() may change the atoms object 

1418 # again. 

1419 # yet, the old state is supposed to be the one AFTER read() 

1420 self.push_oldstate() 

1421 

1422 def push_oldstate(self): 

1423 """This function pushes the current state of the (CASTEP) Atoms object 

1424 onto the previous state. Or in other words after calling this function, 

1425 calculation_required will return False and enquiry functions just 

1426 report the current value, e.g. get_forces(), get_potential_energy(). 

1427 """ 

1428 # make a snapshot of all current input 

1429 # to be able to test if recalculation 

1430 # is necessary 

1431 self._old_atoms = self.atoms.copy() 

1432 self._old_param = deepcopy(self.param) 

1433 self._old_cell = deepcopy(self.cell) 

1434 

1435 def initialize(self, *args, **kwargs): 

1436 """Just an alias for prepar_input_files to comply with standard 

1437 function names in ASE. 

1438 """ 

1439 self.prepare_input_files(*args, **kwargs) 

1440 

1441 def prepare_input_files(self, atoms=None, force_write=None): 

1442 """Only writes the input .cell and .param files and return 

1443 This can be useful if one quickly needs to prepare input files 

1444 for a cluster where no python or ASE is available. One can than 

1445 upload the file manually and read out the results using 

1446 Castep().read(). 

1447 """ 

1448 

1449 if self.param.reuse.value is None: 

1450 if self._pedantic: 

1451 warnings.warn( 

1452 'You have not set e.g. calc.param.reuse = True. ' 

1453 'Reusing a previous calculation may save CPU time! ' 

1454 'The interface will make sure by default, .check exists. ' 

1455 'file before adding this statement to the .param file.') 

1456 if self.param.num_dump_cycles.value is None: 

1457 if self._pedantic: 

1458 warnings.warn( 

1459 'You have not set e.g. calc.param.num_dump_cycles = 0. ' 

1460 'This can save you a lot of disk space. One only needs ' 

1461 '*wvfn* if electronic convergence is not achieved.') 

1462 from ase.io.castep import write_param 

1463 

1464 if atoms is None: 

1465 atoms = self.atoms 

1466 else: 

1467 self.atoms = atoms 

1468 

1469 if force_write is None: 

1470 force_write = self._force_write 

1471 

1472 # if we have new instance of the calculator, 

1473 # move existing results out of the way, first 

1474 if (os.path.isdir(self._directory) 

1475 and self._calls == 0 

1476 and self._rename_existing_dir): 

1477 if os.listdir(self._directory) == []: 

1478 os.rmdir(self._directory) 

1479 else: 

1480 # rename appending creation date of the directory 

1481 ctime = time.localtime(os.lstat(self._directory).st_ctime) 

1482 os.rename(self._directory, '%s.bak-%s' % 

1483 (self._directory, 

1484 time.strftime('%Y%m%d-%H%M%S', ctime))) 

1485 

1486 # create work directory 

1487 if not os.path.isdir(self._directory): 

1488 os.makedirs(self._directory, 0o775) 

1489 

1490 # we do this every time, not only upon first call 

1491 # if self._calls == 0: 

1492 self._fetch_pspots() 

1493 

1494 # if _try_reuse is requested and this 

1495 # is not the first run, we try to find 

1496 # the .check file from the previous run 

1497 # this is only necessary if _track_output 

1498 # is set to true 

1499 if self._try_reuse and self._calls > 0: 

1500 if os.path.exists(self._abs_path(self._check_file)): 

1501 self.param.reuse = self._check_file 

1502 elif os.path.exists(self._abs_path(self._castep_bin_file)): 

1503 self.param.reuse = self._castep_bin_file 

1504 self._seed = self._build_castep_seed() 

1505 self._check_file = f'{self._seed}.check' 

1506 self._castep_bin_file = f'{self._seed}.castep_bin' 

1507 self._castep_file = self._abs_path(f'{self._seed}.castep') 

1508 

1509 # write out the input file 

1510 self._write_cell(self._abs_path(f'{self._seed}.cell'), 

1511 self.atoms, castep_cell=self.cell, 

1512 force_write=force_write) 

1513 

1514 if self._export_settings: 

1515 interface_options = self._opt 

1516 else: 

1517 interface_options = None 

1518 write_param(self._abs_path(f'{self._seed}.param'), self.param, 

1519 check_checkfile=self._check_checkfile, 

1520 force_write=force_write, 

1521 interface_options=interface_options,) 

1522 

1523 def _build_castep_seed(self): 

1524 """Abstracts to construction of the final castep <seed> 

1525 with and without _tracking_output. 

1526 """ 

1527 if self._track_output: 

1528 return '%s-%06d' % (self._label, self._calls) 

1529 else: 

1530 return f'{(self._label)}' 

1531 

1532 def _abs_path(self, path): 

1533 # Create an absolute path for a file to put in the working directory 

1534 return os.path.join(self._directory, path) 

1535 

1536 def run(self): 

1537 """Simply call castep. If the first .err file 

1538 contains text, this will be printed to the screen. 

1539 """ 

1540 # change to target directory 

1541 self._calls += 1 

1542 

1543 # run castep itself 

1544 stdout, stderr = shell_stdouterr('{} {}'.format(self._castep_command, 

1545 self._seed), 

1546 cwd=self._directory) 

1547 if stdout: 

1548 print(f'castep call stdout:\n{stdout}') 

1549 if stderr: 

1550 print(f'castep call stderr:\n{stderr}') 

1551 

1552 # shouldn't it be called after read()??? 

1553 # self.push_oldstate() 

1554 

1555 # check for non-empty error files 

1556 err_file = self._abs_path(f'{self._seed}.0001.err') 

1557 if os.path.exists(err_file): 

1558 with open(err_file) as err_file: 

1559 self._error = err_file.read() 

1560 if self._error: 

1561 raise RuntimeError(self._error) 

1562 

1563 def __repr__(self): 

1564 """Returns generic, fast to capture representation of 

1565 CASTEP settings along with atoms object. 

1566 """ 

1567 expr = '' 

1568 expr += '-----------------Atoms--------------------\n' 

1569 if self.atoms is not None: 

1570 expr += str('%20s\n' % self.atoms) 

1571 else: 

1572 expr += 'None\n' 

1573 

1574 expr += '-----------------Param keywords-----------\n' 

1575 expr += str(self.param) 

1576 expr += '-----------------Cell keywords------------\n' 

1577 expr += str(self.cell) 

1578 expr += '-----------------Internal keys------------\n' 

1579 for key in self.internal_keys: 

1580 expr += '%20s : %s\n' % (key, self._opt[key]) 

1581 

1582 return expr 

1583 

1584 def __getattr__(self, attr): 

1585 """___getattr___ gets overloaded to reroute the internal keys 

1586 and to be able to easily store them in in the param so that 

1587 they can be read in again in subsequent calls. 

1588 """ 

1589 if attr in self.internal_keys: 

1590 return self._opt[attr] 

1591 if attr in ['__repr__', '__str__']: 

1592 raise AttributeError 

1593 elif attr not in self.__dict__: 

1594 raise AttributeError(f'Attribute {attr} not found') 

1595 else: 

1596 return self.__dict__[attr] 

1597 

1598 def __setattr__(self, attr, value): 

1599 """We overload the settattr method to make value assignment 

1600 as pythonic as possible. Internal values all start with _. 

1601 Value assigment is case insensitive! 

1602 """ 

1603 

1604 if attr.startswith('_'): 

1605 # internal variables all start with _ 

1606 # let's check first if they are close but not identical 

1607 # to one of the switches, that the user accesses directly 

1608 similars = difflib.get_close_matches(attr, self.internal_keys, 

1609 cutoff=0.9) 

1610 if attr not in self.internal_keys and similars: 

1611 warnings.warn( 

1612 'Warning: You probably tried one of: ' 

1613 f'{similars} but typed {attr}') 

1614 if attr in self.internal_keys: 

1615 self._opt[attr] = value 

1616 if attr == '_track_output': 

1617 if value: 

1618 self._try_reuse = True 

1619 if self._pedantic: 

1620 warnings.warn( 

1621 'You switched _track_output on. This will ' 

1622 'consume a lot of disk-space. The interface ' 

1623 'also switched _try_reuse on, which will ' 

1624 'try to find the last check file. Set ' 

1625 '_try_reuse = False, if you need ' 

1626 'really separate calculations') 

1627 elif '_try_reuse' in self._opt and self._try_reuse: 

1628 self._try_reuse = False 

1629 if self._pedantic: 

1630 warnings.warn('_try_reuse is set to False, too') 

1631 else: 

1632 self.__dict__[attr] = value 

1633 return 

1634 elif attr in ['atoms', 'cell', 'param', 'results']: 

1635 if value is not None: 

1636 if attr == 'atoms' and not isinstance(value, Atoms): 

1637 raise TypeError( 

1638 f'{value} is not an instance of Atoms.') 

1639 elif attr == 'cell' and not isinstance(value, CastepCell): 

1640 raise TypeError( 

1641 f'{value} is not an instance of CastepCell.') 

1642 elif attr == 'param' and not isinstance(value, CastepParam): 

1643 raise TypeError( 

1644 f'{value} is not an instance of CastepParam.') 

1645 # These 3 are accepted right-away, no matter what 

1646 self.__dict__[attr] = value 

1647 return 

1648 elif attr in self.atoms_obj_keys: 

1649 # keywords which clearly belong to the atoms object are 

1650 # rerouted to go there 

1651 self.atoms.__dict__[attr] = value 

1652 return 

1653 elif attr in self.atoms_keys: 

1654 # CASTEP keywords that should go into the atoms object 

1655 # itself are blocked 

1656 warnings.warn('Ignoring setings of "%s", since this has to be set ' 

1657 'through the atoms object' % attr) 

1658 return 

1659 

1660 attr = attr.lower() 

1661 if attr not in (list(self.cell._options.keys()) 

1662 + list(self.param._options.keys())): 

1663 # what is left now should be meant to be a castep keyword 

1664 # so we first check if it defined, and if not offer some error 

1665 # correction 

1666 if self._kw_tol == 0: 

1667 similars = difflib.get_close_matches( 

1668 attr, 

1669 self.cell._options.keys() + self.param._options.keys()) 

1670 if similars: 

1671 raise RuntimeError( 

1672 f'Option "{attr}" not known! You mean "{similars[0]}"?') 

1673 else: 

1674 raise RuntimeError(f'Option "{attr}" is not known!') 

1675 else: 

1676 warnings.warn('Option "%s" is not known - please set any new' 

1677 ' options directly in the .cell or .param ' 

1678 'objects' % attr) 

1679 return 

1680 

1681 # here we know it must go into one of the component param or cell 

1682 # so we first determine which one 

1683 if attr in self.param._options.keys(): 

1684 comp = 'param' 

1685 elif attr in self.cell._options.keys(): 

1686 comp = 'cell' 

1687 else: 

1688 raise RuntimeError('Programming error: could not attach ' 

1689 'the keyword to an input file') 

1690 

1691 self.__dict__[comp].__setattr__(attr, value) 

1692 

1693 def merge_param(self, param, overwrite=True, ignore_internal_keys=False): 

1694 """Parse a param file and merge it into the current parameters.""" 

1695 if isinstance(param, CastepParam): 

1696 for key, option in param._options.items(): 

1697 if option.value is not None: 

1698 self.param.__setattr__(key, option.value) 

1699 return 

1700 

1701 elif isinstance(param, str): 

1702 param_file = open(param) 

1703 _close = True 

1704 

1705 else: 

1706 # in this case we assume that we have a fileobj already, but check 

1707 # for attributes in order to avoid extended EAFP blocks. 

1708 param_file = param 

1709 

1710 # look before you leap... 

1711 attributes = ['name', 

1712 'close' 

1713 'readlines'] 

1714 

1715 for attr in attributes: 

1716 if not hasattr(param_file, attr): 

1717 raise TypeError('"param" is neither CastepParam nor str ' 

1718 'nor valid fileobj') 

1719 

1720 param = param_file.name 

1721 _close = False 

1722 

1723 self, int_opts = read_param(fd=param_file, calc=self, 

1724 get_interface_options=True) 

1725 

1726 # Add the interface options 

1727 for k, val in int_opts.items(): 

1728 if (k in self.internal_keys and not ignore_internal_keys): 

1729 if val in _tf_table: 

1730 val = _tf_table[val] 

1731 self._opt[k] = val 

1732 

1733 if _close: 

1734 param_file.close() 

1735 

1736 def dryrun_ok(self, dryrun_flag='-dryrun'): 

1737 """Starts a CASTEP run with the -dryrun flag [default] 

1738 in a temporary and check wether all variables are initialized 

1739 correctly. This is recommended for every bigger simulation. 

1740 """ 

1741 from ase.io.castep import write_param 

1742 

1743 temp_dir = tempfile.mkdtemp() 

1744 self._fetch_pspots(temp_dir) 

1745 seed = 'dryrun' 

1746 

1747 self._write_cell(os.path.join(temp_dir, f'{seed}.cell'), 

1748 self.atoms, castep_cell=self.cell) 

1749 # This part needs to be modified now that we rely on the new formats.py 

1750 # interface 

1751 if not os.path.isfile(os.path.join(temp_dir, f'{seed}.cell')): 

1752 warnings.warn(f'{seed}.cell not written - aborting dryrun') 

1753 return None 

1754 write_param(os.path.join(temp_dir, f'{seed}.param'), self.param, ) 

1755 

1756 stdout, stderr = shell_stdouterr(('{} {} {}'.format( 

1757 self._castep_command, 

1758 seed, 

1759 dryrun_flag)), 

1760 cwd=temp_dir) 

1761 

1762 if stdout: 

1763 print(stdout) 

1764 if stderr: 

1765 print(stderr) 

1766 with open(os.path.join(temp_dir, f'{seed}.castep')) as result_file: 

1767 txt = result_file.read() 

1768 ok_string = (r'.*DRYRUN finished.*No problems found with input ' 

1769 r'files.*') 

1770 match = re.match(ok_string, txt, re.DOTALL) 

1771 

1772 m = re.search(r'Number of kpoints used =\s*([0-9]+)', txt) 

1773 if m: 

1774 self._kpoints = int(m.group(1)) 

1775 else: 

1776 warnings.warn( 

1777 'Couldn\'t fetch number of kpoints from dryrun CASTEP file') 

1778 

1779 err_file = os.path.join(temp_dir, f'{seed}.0001.err') 

1780 if match is None and os.path.exists(err_file): 

1781 with open(err_file) as err_file: 

1782 self._error = err_file.read() 

1783 shutil.rmtree(temp_dir) 

1784 

1785 # re.match return None is the string does not match 

1786 return match is not None 

1787 

1788 def _fetch_pspots(self, directory=None): 

1789 """Put all specified pseudo-potentials into the working directory. 

1790 """ 

1791 # should be a '==' right? Otherwise setting _castep_pp_path is not 

1792 # honored. 

1793 if (not cfg.get('PSPOT_DIR', None) 

1794 and self._castep_pp_path == os.path.abspath('.')): 

1795 # By default CASTEP consults the environment variable 

1796 # PSPOT_DIR. If this contains a list of colon separated 

1797 # directories it will check those directories for pseudo- 

1798 # potential files if not in the current directory. 

1799 # Thus if PSPOT_DIR is set there is nothing left to do. 

1800 # If however PSPOT_DIR was been accidentally set 

1801 # (e.g. with regards to a different program) 

1802 # setting CASTEP_PP_PATH to an explicit value will 

1803 # still be honored. 

1804 return 

1805 

1806 if directory is None: 

1807 directory = self._directory 

1808 if not os.path.isdir(self._castep_pp_path): 

1809 warnings.warn(f'PSPs directory {self._castep_pp_path} not found') 

1810 pspots = {} 

1811 if self._find_pspots: 

1812 self.find_pspots() 

1813 if self.cell.species_pot.value is not None: 

1814 for line in self.cell.species_pot.value.split('\n'): 

1815 line = line.split() 

1816 if line: 

1817 pspots[line[0]] = line[1] 

1818 for species in self.atoms.get_chemical_symbols(): 

1819 if not pspots or species not in pspots.keys(): 

1820 if self._build_missing_pspots: 

1821 if self._pedantic: 

1822 warnings.warn( 

1823 'Warning: you have no PP specified for %s. ' 

1824 'CASTEP will now generate an on-the-fly ' 

1825 'potentials. ' 

1826 'For sake of numerical consistency and efficiency ' 

1827 'this is discouraged.' % species) 

1828 else: 

1829 raise RuntimeError( 

1830 f'Warning: you have no PP specified for {species}.') 

1831 if self.cell.species_pot.value: 

1832 for (species, pspot) in pspots.items(): 

1833 orig_pspot_file = os.path.join(self._castep_pp_path, pspot) 

1834 cp_pspot_file = os.path.join(directory, pspot) 

1835 if (os.path.exists(orig_pspot_file) 

1836 and not os.path.exists(cp_pspot_file)): 

1837 if self._copy_pspots: 

1838 shutil.copy(orig_pspot_file, directory) 

1839 elif self._link_pspots: 

1840 os.symlink(orig_pspot_file, cp_pspot_file) 

1841 else: 

1842 if self._pedantic: 

1843 warnings.warn(ppwarning) 

1844 

1845 

1846ppwarning = ('Warning: PP files have neither been ' 

1847 'linked nor copied to the working directory. Make ' 

1848 'sure to set the evironment variable PSPOT_DIR ' 

1849 'accordingly!') 

1850 

1851 

1852def _read_header(out: io.TextIOBase): 

1853 """Read the header blocks from a .castep file. 

1854 

1855 Returns 

1856 ------- 

1857 parameters : dict 

1858 Dictionary storing keys and values of a .param file. 

1859 """ 

1860 def _parse_on_off(_: str): 

1861 return {'on': True, 'off': False}[_] 

1862 

1863 read_title = False 

1864 parameters: Dict[str, Any] = {} 

1865 while True: 

1866 line = out.readline() 

1867 if len(line) == 0: # end of file 

1868 break 

1869 if re.search(r'^\s*\*+$', line) and read_title: # end of header 

1870 break 

1871 

1872 if re.search(r'\**.* Title \**', line): 

1873 read_title = True 

1874 

1875 # General Parameters 

1876 

1877 elif 'output verbosity' in line: 

1878 parameters['iprint'] = int(line.split()[-1][1]) 

1879 elif re.match(r'\stype of calculation\s*:', line): 

1880 parameters['task'] = { 

1881 'single point energy': 'SinglePoint', 

1882 'geometry optimization': 'GeometryOptimization', 

1883 'band structure': 'BandStructure', 

1884 'molecular dynamics': 'MolecularDynamics', 

1885 'optical properties': 'Optics', 

1886 'phonon calculation': 'Phonon', 

1887 'E-field calculation': 'Efield', 

1888 'Phonon followed by E-field': 'Phonon+Efield', 

1889 'transition state search': 'TransitionStateSearch', 

1890 'Magnetic Resonance': 'MagRes', 

1891 'Core level spectra': 'Elnes', 

1892 'Electronic Spectroscopy': 'ElectronicSpectroscopy', 

1893 }[line.split(':')[-1].strip()] 

1894 elif 'stress calculation' in line: 

1895 parameters['calculate_stress'] = _parse_on_off(line.split()[-1]) 

1896 elif 'calculation limited to maximum' in line: 

1897 parameters['run_time'] = float(line.split()[-2]) 

1898 elif re.match(r'\soptimization strategy\s*:', line): 

1899 parameters['opt_strategy'] = { 

1900 'maximize speed(+++)': 'Speed', 

1901 'minimize memory(---)': 'Memory', 

1902 'balance speed and memory': 'Default', 

1903 }[line.split(':')[-1].strip()] 

1904 

1905 # Exchange-Correlation Parameters 

1906 

1907 elif re.match(r'\susing functional\s*:', line): 

1908 parameters['xc_functional'] = { 

1909 'Local Density Approximation': 'LDA', 

1910 'Perdew Wang (1991)': 'PW91', 

1911 'Perdew Burke Ernzerhof': 'PBE', 

1912 'revised Perdew Burke Ernzerhof': 'RPBE', 

1913 'PBE with Wu-Cohen exchange': 'WC', 

1914 'PBE for solids (2008)': 'PBESOL', 

1915 'Hartree-Fock': 'HF', 

1916 'Hartree-Fock +': 'HF-LDA', 

1917 'Screened Hartree-Fock': 'sX', 

1918 'Screened Hartree-Fock + ': 'sX-LDA', 

1919 'hybrid PBE0': 'PBE0', 

1920 'hybrid B3LYP': 'B3LYP', 

1921 'hybrid HSE03': 'HSE03', 

1922 'hybrid HSE06': 'HSE06', 

1923 'RSCAN': 'RSCAN', 

1924 }[line.split(':')[-1].strip()] 

1925 elif 'DFT+D: Semi-empirical dispersion correction' in line: 

1926 parameters['sedc_apply'] = _parse_on_off(line.split()[-1]) 

1927 elif 'SEDC with' in line: 

1928 parameters['sedc_scheme'] = { 

1929 'OBS correction scheme': 'OBS', 

1930 'G06 correction scheme': 'G06', 

1931 'D3 correction scheme': 'D3', 

1932 'D3(BJ) correction scheme': 'D3-BJ', 

1933 'D4 correction scheme': 'D4', 

1934 'JCHS correction scheme': 'JCHS', 

1935 'TS correction scheme': 'TS', 

1936 'TSsurf correction scheme': 'TSSURF', 

1937 'TS+SCS correction scheme': 'TSSCS', 

1938 'aperiodic TS+SCS correction scheme': 'ATSSCS', 

1939 'aperiodic MBD@SCS method': 'AMBD', 

1940 'MBD@SCS method': 'MBD', 

1941 'aperiodic MBD@rsSCS method': 'AMBD*', 

1942 'MBD@rsSCS method': 'MBD*', 

1943 'XDM correction scheme': 'XDM', 

1944 }[line.split(':')[-1].strip()] 

1945 

1946 # Basis Set Parameters 

1947 

1948 elif 'basis set accuracy' in line: 

1949 parameters['basis_precision'] = line.split()[-1] 

1950 elif 'plane wave basis set cut-off' in line: 

1951 parameters['cut_off_energy'] = float(line.split()[-2]) 

1952 elif re.match(r'\sfinite basis set correction\s*:', line): 

1953 parameters['finite_basis_corr'] = { 

1954 'none': 0, 

1955 'manual': 1, 

1956 'automatic': 2, 

1957 }[line.split()[-1]] 

1958 

1959 # Electronic Parameters 

1960 

1961 elif 'treating system as spin-polarized' in line: 

1962 parameters['spin_polarized'] = True 

1963 

1964 # Electronic Minimization Parameters 

1965 

1966 elif 'Treating system as non-metallic' in line: 

1967 parameters['fix_occupancy'] = True 

1968 elif 'total energy / atom convergence tol.' in line: 

1969 parameters['elec_energy_tol'] = float(line.split()[-2]) 

1970 elif 'convergence tolerance window' in line: 

1971 parameters['elec_convergence_win'] = int(line.split()[-2]) 

1972 elif 'max. number of SCF cycles:' in line: 

1973 parameters['max_scf_cycles'] = float(line.split()[-1]) 

1974 elif 'dump wavefunctions every' in line: 

1975 parameters['num_dump_cycles'] = float(line.split()[-3]) 

1976 

1977 # Density Mixing Parameters 

1978 

1979 elif 'density-mixing scheme' in line: 

1980 parameters['mixing_scheme'] = line.split()[-1] 

1981 

1982 return parameters 

1983 

1984 

1985def _read_unit_cell(out: io.TextIOBase): 

1986 """Read a Unit Cell block from a .castep file.""" 

1987 while True: 

1988 line = out.readline() 

1989 fields = line.split() 

1990 if len(fields) == 6: 

1991 break 

1992 lattice_real = [] 

1993 for _ in range(3): 

1994 lattice_real.append([float(f) for f in fields[0:3]]) 

1995 line = out.readline() 

1996 fields = line.split() 

1997 return np.array(lattice_real) 

1998 

1999 

2000def _read_forces(out: io.TextIOBase, n_atoms: int): 

2001 """Read a block for atomic forces from a .castep file.""" 

2002 constraints: List[FixConstraint] = [] 

2003 forces = [] 

2004 while True: 

2005 line = out.readline() 

2006 fields = line.split() 

2007 if len(fields) == 7: 

2008 break 

2009 for n in range(n_atoms): 

2010 consd = np.array([0, 0, 0]) 

2011 fxyz = [0.0, 0.0, 0.0] 

2012 for i, force_component in enumerate(fields[-4:-1]): 

2013 if force_component.count("(cons'd)") > 0: 

2014 consd[i] = 1 

2015 # remove constraint labels in force components 

2016 fxyz[i] = float(force_component.replace("(cons'd)", '')) 

2017 if consd.all(): 

2018 constraints.append(FixAtoms(n)) 

2019 elif consd.any(): 

2020 constraints.append(FixCartesian(n, consd)) 

2021 forces.append(fxyz) 

2022 line = out.readline() 

2023 fields = line.split() 

2024 return forces, constraints 

2025 

2026 

2027def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int): 

2028 """Read fractional coordinates from a .castep file.""" 

2029 species: List[str] = [] 

2030 custom_species: Optional[List[str]] = None # A CASTEP special thing 

2031 positions_frac: List[List[float]] = [] 

2032 while True: 

2033 line = out.readline() 

2034 fields = line.split() 

2035 if len(fields) == 7: 

2036 break 

2037 for _ in range(n_atoms): 

2038 spec_custom = fields[1].split(':', 1) 

2039 elem = spec_custom[0] 

2040 if len(spec_custom) > 1 and custom_species is None: 

2041 # Add it to the custom info! 

2042 custom_species = list(species) 

2043 species.append(elem) 

2044 if custom_species is not None: 

2045 custom_species.append(fields[1]) 

2046 positions_frac.append([float(s) for s in fields[3:6]]) 

2047 line = out.readline() 

2048 fields = line.split() 

2049 return species, custom_species, positions_frac 

2050 

2051 

2052def _read_stress(out: io.TextIOBase): 

2053 """Read a block for the stress tensor from a .castep file.""" 

2054 while True: 

2055 line = out.readline() 

2056 fields = line.split() 

2057 if len(fields) == 6: 

2058 break 

2059 results = {} 

2060 stress = [] 

2061 for _ in range(3): 

2062 stress.append([float(s) for s in fields[2:5]]) 

2063 line = out.readline() 

2064 fields = line.split() 

2065 # stress in .castep file is given in GPa 

2066 results['stress'] = np.array(stress) * units.GPa 

2067 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]] 

2068 line = out.readline() 

2069 if "Pressure:" in line: 

2070 results['pressure'] = float(line.split()[-2]) * units.GPa 

2071 return results 

2072 

2073 

2074def _read_mulliken_charges(out: io.TextIOBase): 

2075 """Read a block for Mulliken charges from a .castep file.""" 

2076 for i in range(3): 

2077 line = out.readline() 

2078 if i == 1: 

2079 spin_polarized = 'Spin' in line 

2080 results = defaultdict(list) 

2081 while True: 

2082 line = out.readline() 

2083 fields = line.split() 

2084 if len(fields) == 1: 

2085 break 

2086 if spin_polarized: 

2087 if len(fields) != 7: # due to CASTEP 18 outformat changes 

2088 results['charges'].append(float(fields[-2])) 

2089 results['magmoms'].append(float(fields[-1])) 

2090 else: 

2091 results['charges'].append(float(fields[-1])) 

2092 return {k: np.array(v) for k, v in results.items()} 

2093 

2094 

2095def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int): 

2096 """Read the Hirshfeld analysis when iprint > 1 from a .castep file.""" 

2097 results = defaultdict(list) 

2098 for _ in range(n_atoms): 

2099 while True: 

2100 line = out.readline() 

2101 if line.strip() == '': 

2102 break # end for each atom 

2103 if 'Hirshfeld / free atomic volume :' in line: 

2104 line = out.readline() 

2105 fields = line.split() 

2106 results['hirshfeld_volume_ratios'].append(float(fields[0])) 

2107 return {k: np.array(v) for k, v in results.items()} 

2108 

2109 

2110def _read_hirshfeld_charges(out: io.TextIOBase): 

2111 """Read a block for Hirshfeld charges from a .castep file.""" 

2112 for i in range(3): 

2113 line = out.readline() 

2114 if i == 1: 

2115 spin_polarized = 'Spin' in line 

2116 results = defaultdict(list) 

2117 while True: 

2118 line = out.readline() 

2119 fields = line.split() 

2120 if len(fields) == 1: 

2121 break 

2122 if spin_polarized: 

2123 results['hirshfeld_charges'].append(float(fields[-2])) 

2124 results['hirshfeld_magmoms'].append(float(fields[-1])) 

2125 else: 

2126 results['hirshfeld_charges'].append(float(fields[-1])) 

2127 return {k: np.array(v) for k, v in results.items()} 

2128 

2129 

2130def _get_indices_to_sort_back(symbols, species): 

2131 """Get indices to sort spicies in .castep back to atoms.symbols.""" 

2132 uniques = np.unique(symbols) 

2133 indices = np.full(len(symbols), -1, dtype=int) 

2134 for unique in uniques: 

2135 where_symbols = [i for i, s in enumerate(symbols) if s == unique] 

2136 where_species = [j for j, s in enumerate(species) if s == unique] 

2137 for i, j in zip(where_symbols, where_species): 

2138 indices[i] = j 

2139 if -1 in indices: 

2140 not_assigned = [_ for _ in indices if _ == -1] 

2141 raise RuntimeError(f'Atoms {not_assigned} where not assigned.') 

2142 return indices 

2143 

2144 

2145def _set_energy_and_free_energy(results: Dict[str, Any]): 

2146 """Set values referred to as `energy` and `free_energy`.""" 

2147 if 'energy_with_dispersion_correction' in results: 

2148 suffix = '_with_dispersion_correction' 

2149 else: 

2150 suffix = '_without_dispersion_correction' 

2151 

2152 if 'free_energy' + suffix in results: # metallic 

2153 keye = 'energy_zero' + suffix 

2154 keyf = 'free_energy' + suffix 

2155 else: # non-metallic 

2156 # The finite basis set correction is applied to the energy at finite T 

2157 # (not the energy at 0 K). We should hence refer to the corrected value 

2158 # as `energy` only when the free energy is unavailable, i.e., only when 

2159 # FIX_OCCUPANCY : TRUE and thus no smearing is applied. 

2160 if 'energy_with_finite_basis_set_correction' in results: 

2161 keye = 'energy_with_finite_basis_set_correction' 

2162 else: 

2163 keye = 'energy' + suffix 

2164 keyf = 'energy' + suffix 

2165 

2166 results['energy'] = results[keye] 

2167 results['free_energy'] = results[keyf] 

2168 

2169 

2170def get_castep_version(castep_command): 

2171 """This returns the version number as printed in the CASTEP banner. 

2172 For newer CASTEP versions ( > 6.1) the --version command line option 

2173 has been added; this will be attempted first. 

2174 """ 

2175 import tempfile 

2176 with tempfile.TemporaryDirectory() as temp_dir: 

2177 return _get_castep_version(castep_command, temp_dir) 

2178 

2179 

2180def _get_castep_version(castep_command, temp_dir): 

2181 jname = 'dummy_jobname' 

2182 stdout, stderr = '', '' 

2183 fallback_version = 16. # CASTEP 16.0 and 16.1 report version wrongly 

2184 try: 

2185 stdout, stderr = subprocess.Popen( 

2186 castep_command.split() + ['--version'], 

2187 stderr=subprocess.PIPE, 

2188 stdout=subprocess.PIPE, cwd=temp_dir, 

2189 universal_newlines=True).communicate() 

2190 if 'CASTEP version' not in stdout: 

2191 stdout, stderr = subprocess.Popen( 

2192 castep_command.split() + [jname], 

2193 stderr=subprocess.PIPE, 

2194 stdout=subprocess.PIPE, cwd=temp_dir, 

2195 universal_newlines=True).communicate() 

2196 except Exception: # XXX Which kind of exception? 

2197 msg = '' 

2198 msg += 'Could not determine the version of your CASTEP binary \n' 

2199 msg += 'This usually means one of the following \n' 

2200 msg += ' * you do not have CASTEP installed \n' 

2201 msg += ' * you have not set the CASTEP_COMMAND to call it \n' 

2202 msg += ' * you have provided a wrong CASTEP_COMMAND. \n' 

2203 msg += ' Make sure it is in your PATH\n\n' 

2204 msg += stdout 

2205 msg += stderr 

2206 raise CastepVersionError(msg) 

2207 if 'CASTEP version' in stdout: 

2208 output_txt = stdout.split('\n') 

2209 version_re = re.compile(r'CASTEP version:\s*([0-9\.]*)') 

2210 else: 

2211 with open(os.path.join(temp_dir, f'{jname}.castep')) as output: 

2212 output_txt = output.readlines() 

2213 version_re = re.compile(r'(?<=CASTEP version )[0-9.]*') 

2214 # shutil.rmtree(temp_dir) 

2215 for line in output_txt: 

2216 if 'CASTEP version' in line: 

2217 try: 

2218 return float(version_re.findall(line)[0]) 

2219 except ValueError: 

2220 # Fallback for buggy --version on CASTEP 16.0, 16.1 

2221 return fallback_version 

2222 

2223 

2224def create_castep_keywords(castep_command, filename='castep_keywords.json', 

2225 force_write=True, path='.', fetch_only=None): 

2226 """This function allows to fetch all available keywords from stdout 

2227 of an installed castep binary. It furthermore collects the documentation 

2228 to harness the power of (ipython) inspection and type for some basic 

2229 type checking of input. All information is stored in a JSON file that is 

2230 not distributed by default to avoid breaking the license of CASTEP. 

2231 """ 

2232 # Takes a while ... 

2233 # Fetch all allowed parameters 

2234 # fetch_only : only fetch that many parameters (for testsuite only) 

2235 suffixes = ['cell', 'param'] 

2236 

2237 filepath = os.path.join(path, filename) 

2238 

2239 if os.path.exists(filepath) and not force_write: 

2240 warnings.warn('CASTEP Options Module file exists. ' 

2241 'You can overwrite it by calling ' 

2242 'python castep.py -f [CASTEP_COMMAND].') 

2243 return False 

2244 

2245 # Not saving directly to file her to prevent half-generated files 

2246 # which will cause problems on future runs 

2247 

2248 castep_version = get_castep_version(castep_command) 

2249 

2250 help_all, _ = shell_stdouterr(f'{castep_command} -help all') 

2251 

2252 # Filter out proper keywords 

2253 try: 

2254 # The old pattern does not math properly as in CASTEP as of v8.0 there 

2255 # are some keywords for the semi-empircal dispersion correction (SEDC) 

2256 # which also include numbers. 

2257 if castep_version < 7.0: 

2258 pattern = r'((?<=^ )[A-Z_]{2,}|(?<=^)[A-Z_]{2,})' 

2259 else: 

2260 pattern = r'((?<=^ )[A-Z_\d]{2,}|(?<=^)[A-Z_\d]{2,})' 

2261 

2262 raw_options = re.findall(pattern, help_all, re.MULTILINE) 

2263 except Exception: 

2264 warnings.warn(f'Problem parsing: {help_all}') 

2265 raise 

2266 

2267 types = set() 

2268 levels = set() 

2269 

2270 processed_n = 0 

2271 to_process = len(raw_options[:fetch_only]) 

2272 

2273 processed_options = {sf: {} for sf in suffixes} 

2274 

2275 for o_i, option in enumerate(raw_options[:fetch_only]): 

2276 doc, _ = shell_stdouterr(f'{castep_command} -help {option}') 

2277 

2278 # Stand Back! I know regular expressions (http://xkcd.com/208/) :-) 

2279 match = re.match(r'(?P<before_type>.*)Type: (?P<type>.+?)\s+' 

2280 + r'Level: (?P<level>[^ ]+)\n\s*\n' 

2281 + r'(?P<doc>.*?)(\n\s*\n|$)', doc, re.DOTALL) 

2282 

2283 processed_n += 1 

2284 

2285 if match is not None: 

2286 match = match.groupdict() 

2287 

2288 # JM: uncomment lines in following block to debug issues 

2289 # with keyword assignment during extraction process from CASTEP 

2290 suffix = None 

2291 if re.findall(r'PARAMETERS keywords:\n\n\s?None found', doc): 

2292 suffix = 'cell' 

2293 if re.findall(r'CELL keywords:\n\n\s?None found', doc): 

2294 suffix = 'param' 

2295 if suffix is None: 

2296 warnings.warn('%s -> not assigned to either' 

2297 ' CELL or PARAMETERS keywords' % option) 

2298 

2299 option = option.lower() 

2300 mtyp = match.get('type', None) 

2301 mlvl = match.get('level', None) 

2302 mdoc = match.get('doc', None) 

2303 

2304 if mtyp is None: 

2305 warnings.warn(f'Found no type for {option}') 

2306 continue 

2307 if mlvl is None: 

2308 warnings.warn(f'Found no level for {option}') 

2309 continue 

2310 if mdoc is None: 

2311 warnings.warn(f'Found no doc string for {option}') 

2312 continue 

2313 

2314 types = types.union([mtyp]) 

2315 levels = levels.union([mlvl]) 

2316 

2317 processed_options[suffix][option] = { 

2318 'keyword': option, 

2319 'option_type': mtyp, 

2320 'level': mlvl, 

2321 'docstring': mdoc 

2322 } 

2323 

2324 processed_n += 1 

2325 

2326 frac = (o_i + 1.0) / to_process 

2327 sys.stdout.write('\rProcessed: [{}] {:>3.0f}%'.format( 

2328 '#' * int(frac * 20) + ' ' 

2329 * (20 - int(frac * 20)), 

2330 100 * frac)) 

2331 sys.stdout.flush() 

2332 

2333 else: 

2334 warnings.warn(f'create_castep_keywords: Could not process {option}') 

2335 

2336 sys.stdout.write('\n') 

2337 sys.stdout.flush() 

2338 

2339 processed_options['types'] = list(types) 

2340 processed_options['levels'] = list(levels) 

2341 processed_options['castep_version'] = castep_version 

2342 

2343 json.dump(processed_options, open(filepath, 'w'), indent=4) 

2344 

2345 warnings.warn(f'CASTEP v{castep_version}, fetched {processed_n} keywords') 

2346 return True 

2347 

2348 

2349CastepKeywords = namedtuple('CastepKeywords', 

2350 ['CastepParamDict', 'CastepCellDict', 

2351 'types', 'levels', 'castep_version']) 

2352 

2353# We keep this just for naming consistency with older versions 

2354 

2355 

2356def make_cell_dict(data=None): 

2357 from ase.io.castep.castep_input_file import CastepOptionDict 

2358 

2359 data = data if data is not None else {} 

2360 

2361 class CastepCellDict(CastepOptionDict): 

2362 def __init__(self): 

2363 CastepOptionDict.__init__(self, data) 

2364 

2365 return CastepCellDict 

2366 

2367 

2368def make_param_dict(data=None): 

2369 from ase.io.castep.castep_input_file import CastepOptionDict 

2370 

2371 data = data if data is not None else {} 

2372 

2373 class CastepParamDict(CastepOptionDict): 

2374 def __init__(self): 

2375 CastepOptionDict.__init__(self, data) 

2376 

2377 return CastepParamDict 

2378 

2379 

2380class CastepVersionError(Exception): 

2381 """No special behaviour, works to signal when Castep can not be found""" 

2382 

2383 

2384def get_castep_pp_path(castep_pp_path=''): 

2385 """Abstract the quest for a CASTEP PSP directory.""" 

2386 if castep_pp_path: 

2387 return os.path.abspath(os.path.expanduser(castep_pp_path)) 

2388 elif 'PSPOT_DIR' in cfg: 

2389 return cfg['PSPOT_DIR'] 

2390 elif 'CASTEP_PP_PATH' in cfg: 

2391 return cfg['CASTEP_PP_PATH'] 

2392 else: 

2393 return os.path.abspath('.') 

2394 

2395 

2396def get_castep_command(castep_command=''): 

2397 """Abstract the quest for a castep_command string.""" 

2398 if castep_command: 

2399 return castep_command 

2400 elif 'CASTEP_COMMAND' in cfg: 

2401 return cfg['CASTEP_COMMAND'] 

2402 else: 

2403 return 'castep' 

2404 

2405 

2406def shell_stdouterr(raw_command, cwd=None): 

2407 """Abstracts the standard call of the commandline, when 

2408 we are only interested in the stdout and stderr 

2409 """ 

2410 stdout, stderr = subprocess.Popen(raw_command, 

2411 stdout=subprocess.PIPE, 

2412 stderr=subprocess.PIPE, 

2413 universal_newlines=True, 

2414 shell=True, cwd=cwd).communicate() 

2415 return stdout.strip(), stderr.strip() 

2416 

2417 

2418def import_castep_keywords(castep_command='', 

2419 filename='castep_keywords.json', 

2420 path='.'): 

2421 """Search for castep keywords JSON in multiple paths""" 

2422 

2423 config_paths = ('~/.ase', '~/.config/ase') 

2424 searchpaths = [path] + [os.path.expanduser(config_path) 

2425 for config_path in config_paths] 

2426 try: 

2427 keywords_file = sum( 

2428 (glob.glob(os.path.join(sp, filename)) for sp in searchpaths), [] 

2429 )[0] 

2430 except IndexError: 

2431 warnings.warn("""Generating CASTEP keywords JSON file... hang on. 

2432 The CASTEP keywords JSON file contains abstractions for CASTEP input 

2433 parameters (for both .cell and .param input files), including some 

2434 format checks and descriptions. The latter are extracted from the 

2435 internal online help facility of a CASTEP binary, thus allowing to 

2436 easily keep the calculator synchronized with (different versions of) 

2437 the CASTEP code. Consequently, avoiding licensing issues (CASTEP is 

2438 distributed commercially by Biovia), we consider it wise not to 

2439 provide the file in the first place.""") 

2440 create_castep_keywords(get_castep_command(castep_command), 

2441 filename=filename, path=path) 

2442 keywords_file = Path(path).absolute() / filename 

2443 

2444 warnings.warn( 

2445 f'Stored castep keywords dictionary as {keywords_file}. ' 

2446 f'Copy it to {Path(config_paths[0]).expanduser() / filename} for ' 

2447 r'user installation.') 

2448 

2449 # Now create the castep_keywords object proper 

2450 with open(keywords_file) as fd: 

2451 kwdata = json.load(fd) 

2452 

2453 # This is a bit awkward, but it's necessary for backwards compatibility 

2454 param_dict = make_param_dict(kwdata['param']) 

2455 cell_dict = make_cell_dict(kwdata['cell']) 

2456 

2457 castep_keywords = CastepKeywords(param_dict, cell_dict, 

2458 kwdata['types'], kwdata['levels'], 

2459 kwdata['castep_version']) 

2460 

2461 return castep_keywords 

2462 

2463 

2464if __name__ == '__main__': 

2465 warnings.warn( 

2466 'When called directly this calculator will fetch all available ' 

2467 'keywords from the binarys help function into a ' 

2468 'castep_keywords.json in the current directory %s ' 

2469 'For system wide usage, it can be copied into an ase installation ' 

2470 'at ASE/calculators. ' 

2471 'This castep_keywords.json usually only needs to be generated once ' 

2472 'for a CASTEP binary/CASTEP version.' % os.getcwd()) 

2473 

2474 import optparse 

2475 parser = optparse.OptionParser() 

2476 parser.add_option( 

2477 '-f', '--force-write', dest='force_write', 

2478 help='Force overwriting existing castep_keywords.json', default=False, 

2479 action='store_true') 

2480 (options, args) = parser.parse_args() 

2481 

2482 if args: 

2483 opt_castep_command = ''.join(args) 

2484 else: 

2485 opt_castep_command = '' 

2486 generated = create_castep_keywords(get_castep_command(opt_castep_command), 

2487 force_write=options.force_write) 

2488 

2489 if generated: 

2490 try: 

2491 with open('castep_keywords.json') as fd: 

2492 json.load(fd) 

2493 except Exception as e: 

2494 warnings.warn( 

2495 f'{e} Ooops, something went wrong with the CASTEP keywords') 

2496 else: 

2497 warnings.warn('Import works. Looking good!')