Coverage for /builds/hweiske/ase/ase/calculators/castep.py: 49.26%

1"""This module defines an interface to CASTEP for

2 use by the ASE (Webpage: http://wiki.fysik.dtu.dk/ase)

4Authors:

5 Max Hoffmann, max.hoffmann@ch.tum.de

6 Joerg Meyer, joerg.meyer@ch.tum.de

7 Simon P. Rittmeyer, simon.rittmeyer@tum.de

9Contributors:

10 Juan M. Lorenzi, juan.lorenzi@tum.de

11 Georg S. Michelitsch, georg.michelitsch@tch.tum.de

12 Reinhard J. Maurer, reinhard.maurer@yale.edu

13 Simone Sturniolo, simone.sturniolo@stfc.ac.uk

14"""

16import difflib

17import glob

18import io

19import json

20import os

21import re

22import shutil

23import subprocess

24import sys

25import tempfile

26import time

27import warnings

28from collections import defaultdict, namedtuple

29from copy import deepcopy

30from itertools import product

31from pathlib import Path

32from typing import Any, Dict, List, Optional

34import numpy as np

36from ase import Atoms, units

37from ase.calculators.calculator import (BaseCalculator, compare_atoms,

38 kpts2sizeandoffsets)

39from ase.config import cfg

40from ase.constraints import FixAtoms, FixCartesian, FixConstraint

41from ase.dft.kpoints import BandPath

42from ase.io.castep import read_bands, read_param

43from ase.io.castep.castep_input_file import CastepCell, CastepParam

44from ase.parallel import paropen

46__all__ = [

47 'Castep',

48 'CastepCell',

49 'CastepParam',

50 'create_castep_keywords']

52# A convenient table to avoid the previously used "eval"

53_tf_table = {

54 '': True, # Just the keyword is equivalent to True

55 'True': True,

56 'False': False}

59def _self_getter(getf):

60 # A decorator that makes it so that if no 'atoms' argument is passed to a

61 # getter function, self.atoms is used instead

63 def decor_getf(self, atoms=None, *args, **kwargs):

65 if atoms is None:

66 atoms = self.atoms

68 return getf(self, atoms, *args, **kwargs)

70 return decor_getf

73class Castep(BaseCalculator):

74 r"""

75CASTEP Interface Documentation

78Introduction

79============

81CASTEP_ [1]_ W_ is a software package which uses density functional theory to

82provide a good atomic-level description of all manner of materials and

83molecules. CASTEP can give information about total energies, forces and

84stresses on an atomic system, as well as calculating optimum geometries, band

85structures, optical spectra, phonon spectra and much more. It can also perform

86molecular dynamics simulations.

88The CASTEP calculator interface class offers intuitive access to all CASTEP

89settings and most results. All CASTEP specific settings are accessible via

90attribute access (*i.e*. ``calc.param.keyword = ...`` or

91``calc.cell.keyword = ...``)

94Getting Started:

95================

97Set the environment variables appropriately for your system::

99 export CASTEP_COMMAND=' ... '

100 export CASTEP_PP_PATH=' ... '

101

102Note: alternatively to CASTEP_PP_PATH one can set PSPOT_DIR

103as CASTEP consults this by default, i.e.::

104

105 export PSPOT_DIR=' ... '

106

107

108Running the Calculator

109======================

110

111The default initialization command for the CASTEP calculator is

112

113.. class:: Castep(directory='CASTEP', label='castep')

114

115To do a minimal run one only needs to set atoms, this will use all

116default settings of CASTEP, meaning LDA, singlepoint, etc..

117

118With a generated *castep_keywords.json* in place all options are accessible

119by inspection, *i.e.* tab-completion. This works best when using ``ipython``.

120All options can be accessed via ``calc.param.<TAB>`` or ``calc.cell.<TAB>``

121and documentation is printed with ``calc.param.<keyword> ?`` or

122``calc.cell.<keyword> ?``. All options can also be set directly

123using ``calc.keyword = ...`` or ``calc.KEYWORD = ...`` or even

124``ialc.KeYwOrD`` or directly as named arguments in the call to the constructor

125(*e.g.* ``Castep(task='GeometryOptimization')``).

126If using this calculator on a machine without CASTEP, one might choose to copy

127a *castep_keywords.json* file generated elsewhere in order to access this

128feature: the file will be used if located in the working directory,

129*$HOME/.ase/* or *ase/ase/calculators/* within the ASE library. The file should

130be generated the first time it is needed, but you can generate a new keywords

131file in the currect directory with ``python -m ase.calculators.castep``.

132

133All options that go into the ``.param`` file are held in an ``CastepParam``

134instance, while all options that go into the ``.cell`` file and don't belong

135to the atoms object are held in an ``CastepCell`` instance. Each instance can

136be created individually and can be added to calculators by attribute

137assignment, *i.e.* ``calc.param = param`` or ``calc.cell = cell``.

138

139All internal variables of the calculator start with an underscore (_).

140All cell attributes that clearly belong into the atoms object are blocked.

141Setting ``calc.atoms_attribute`` (*e.g.* ``= positions``) is sent directly to

142the atoms object.

143

144

145Arguments:

146==========

147

148========================= ====================================================

149Keyword Description

150========================= ====================================================

151``directory`` The relative path where all input and output files

152 will be placed. If this does not exist, it will be

153 created. Existing directories will be moved to

154 directory-TIMESTAMP unless self._rename_existing_dir

155 is set to false.

156

157``label`` The prefix of .param, .cell, .castep, etc. files.

158

159``castep_command`` Command to run castep. Can also be set via the bash

160 environment variable ``CASTEP_COMMAND``. If none is

161 given or found, will default to ``castep``

162

163``check_castep_version`` Boolean whether to check if the installed castep

164 version matches the version from which the available

165 options were deduced. Defaults to ``False``.

166

167``castep_pp_path`` The path where the pseudopotentials are stored. Can

168 also be set via the bash environment variables

169 ``PSPOT_DIR`` (preferred) and ``CASTEP_PP_PATH``.

170 Will default to the current working directory if

171 none is given or found. Note that pseudopotentials

172 may be generated on-the-fly if they are not found.

173

174``find_pspots`` Boolean whether to search for pseudopotentials in

175 ``<castep_pp_path>`` or not. If activated, files in

176 this directory will be checked for typical names. If

177 files are not found, they will be generated on the

178 fly, depending on the ``_build_missing_pspots``

179 value. A RuntimeError will be raised in case

180 multiple files per element are found. Defaults to

181 ``False``.

182``keyword_tolerance`` Integer to indicate the level of tolerance to apply

183 validation of any parameters set in the CastepCell

184 or CastepParam objects against the ones found in

185 castep_keywords. Levels are as following:

186

187 0 = no tolerance, keywords not found in

188 castep_keywords will raise an exception

189

190 1 = keywords not found will be accepted but produce

191 a warning (default)

192

193 2 = keywords not found will be accepted silently

194

195 3 = no attempt is made to look for

196 castep_keywords.json at all

197``castep_keywords`` Can be used to pass a CastepKeywords object that is

198 then used with no attempt to actually load a

199 castep_keywords.json file. Most useful for debugging

200 and testing purposes.

201

202========================= ====================================================

203

204

205Additional Settings

206===================

207

208========================= ====================================================

209Internal Setting Description

210========================= ====================================================

211``_castep_command`` (``=castep``): the actual shell command used to

212 call CASTEP.

213

214``_check_checkfile`` (``=True``): this makes write_param() only

215 write a continue or reuse statement if the

216 addressed .check or .castep_bin file exists in the

217 directory.

218

219``_copy_pspots`` (``=False``): if set to True the calculator will

220 actually copy the needed pseudo-potential (\*.usp)

221 file, usually it will only create symlinks.

222

223``_link_pspots`` (``=True``): if set to True the calculator will

224 actually will create symlinks to the needed pseudo

225 potentials. Set this option (and ``_copy_pspots``)

226 to False if you rather want to access your pseudo

227 potentials using the PSPOT_DIR environment variable

228 that is read by CASTEP.

229 *Note:* This option has no effect if ``copy_pspots``

230 is True..

231

232``_build_missing_pspots`` (``=True``): if set to True, castep will generate

233 missing pseudopotentials on the fly. If not, a

234 RuntimeError will be raised if not all files were

235 found.

236

237``_export_settings`` (``=True``): if this is set to

238 True, all calculator internal settings shown here

239 will be included in the .param in a comment line (#)

240 and can be read again by merge_param. merge_param

241 can be forced to ignore this directive using the

242 optional argument ``ignore_internal_keys=True``.

243

244``_force_write`` (``=True``): this controls wether the \*cell and

245 \*param will be overwritten.

246

247``_prepare_input_only`` (``=False``): If set to True, the calculator will

248 create \*cell und \*param file but not

249 start the calculation itself.

250 If this is used to prepare jobs locally

251 and run on a remote cluster it is recommended

252 to set ``_copy_pspots = True``.

253

254``_castep_pp_path`` (``='.'``) : the place where the calculator

255 will look for pseudo-potential files.

256

257``_find_pspots`` (``=False``): if set to True, the calculator will

258 try to find the respective pseudopotentials from

259 <_castep_pp_path>. As long as there are no multiple

260 files per element in this directory, the auto-detect

261 feature should be very robust. Raises a RuntimeError

262 if required files are not unique (multiple files per

263 element). Non existing pseudopotentials will be

264 generated, though this could be dangerous.

265

266``_rename_existing_dir`` (``=True``) : when using a new instance

267 of the calculator, this will move directories out of

268 the way that would be overwritten otherwise,

269 appending a date string.

270

271``_set_atoms`` (``=False``) : setting this to True will overwrite

272 any atoms object previously attached to the

273 calculator when reading a \.castep file. By de-

274 fault, the read() function will only create a new

275 atoms object if none has been attached and other-

276 wise try to assign forces etc. based on the atom's

277 positions. ``_set_atoms=True`` could be necessary

278 if one uses CASTEP's internal geometry optimization

279 (``calc.param.task='GeometryOptimization'``)

280 because then the positions get out of sync.

281 *Warning*: this option is generally not recommended

282 unless one knows one really needs it. There should

283 never be any need, if CASTEP is used as a

284 single-point calculator.

285

286``_track_output`` (``=False``) : if set to true, the interface

287 will append a number to the label on all input

288 and output files, where n is the number of calls

289 to this instance. *Warning*: this setting may con-

290 sume a lot more disk space because of the additio-

291 nal \*check files.

292

293``_try_reuse`` (``=_track_output``) : when setting this, the in-

294 terface will try to fetch the reuse file from the

295 previous run even if _track_output is True. By de-

296 fault it is equal to _track_output, but may be

297 overridden.

298

299 Since this behavior may not always be desirable for

300 single-point calculations. Regular reuse for *e.g.*

301 a geometry-optimization can be achieved by setting

302 ``calc.param.reuse = True``.

303

304``_pedantic`` (``=False``) if set to true, the calculator will

305 inform about settings probably wasting a lot of CPU

306 time or causing numerical inconsistencies.

307

308========================= ====================================================

309

310Special features:

311=================

312

313

314``.dryrun_ok()``

315 Runs ``castep_command seed -dryrun`` in a temporary directory return True if

316 all variables initialized ok. This is a fast way to catch errors in the

317 input. Afterwards _kpoints_used is set.

318

319``.merge_param()``

320 Takes a filename or filehandler of a .param file or CastepParam instance and

321 merges it into the current calculator instance, overwriting current settings

322

323``.keyword.clear()``

324 Can be used on any option like ``calc.param.keyword.clear()`` or

325 ``calc.cell.keyword.clear()`` to return to the CASTEP default.

326

327``.initialize()``

328 Creates all needed input in the ``_directory``. This can then copied to and

329 run in a place without ASE or even python.

330

331``.set_pspot('<library>')``

332 This automatically sets the pseudo-potential for all present species to

333 ``<Species>_<library>.usp``. Make sure that ``_castep_pp_path`` is set

334 correctly. Note that there is no check, if the file actually exists. If it

335 doesn't castep will crash! You may want to use ``find_pspots()`` instead.

336

337``.find_pspots(pspot=<library>, suffix=<suffix>)``

338 This automatically searches for pseudopotentials of type

339 ``<Species>_<library>.<suffix>`` or ``<Species>-<library>.<suffix>`` in

340 ``castep_pp_path` (make sure this is set correctly). Note that ``<Species>``

341 will be searched for case insensitive. Regular expressions are accepted, and

342 arguments ``'*'`` will be regarded as bash-like wildcards. Defaults are any

343 ``<library>`` and any ``<suffix>`` from ``['usp', 'UPF', 'recpot']``. If you

344 have well-organized folders with pseudopotentials of one kind, this should

345 work with the defaults.

346

347``print(calc)``

348 Prints a short summary of the calculator settings and atoms.

349

350``ase.io.castep.read_seed('path-to/seed')``

351 Given you have a combination of seed.{param,cell,castep} this will return an

352 atoms object with the last ionic positions in the .castep file and all other

353 settings parsed from the .cell and .param file. If no .castep file is found

354 the positions are taken from the .cell file. The output directory will be

355 set to the same directory, only the label is preceded by 'copy_of\_' to

356 avoid overwriting.

357

358``.set_kpts(kpoints)``

359 This is equivalent to initialising the calculator with

360 ``calc = Castep(kpts=kpoints)``. ``kpoints`` can be specified in many

361 convenient forms: simple Monkhorst-Pack grids can be specified e.g.

362 ``(2, 2, 3)`` or ``'2 2 3'``; lists of specific weighted k-points can be

363 given in reciprocal lattice coordinates e.g.

364 ``[[0, 0, 0, 0.25], [0.25, 0.25, 0.25, 0.75]]``; a dictionary syntax is

365 available for more complex requirements e.g.

366 ``{'size': (2, 2, 2), 'gamma': True}`` will give a Gamma-centered 2x2x2 M-P

367 grid, ``{'density': 10, 'gamma': False, 'even': False}`` will give a mesh

368 with density of at least 10 Ang (based on the unit cell of currently-attached

369 atoms) with an odd number of points in each direction and avoiding the Gamma

370 point.

371

372``.set_bandpath(bandpath)``

373 This is equivalent to initialialising the calculator with

374 ``calc=Castep(bandpath=bandpath)`` and may be set simultaneously with *kpts*.

375 It allows an electronic band structure path to be set up using ASE BandPath

376 objects. This enables a band structure calculation to be set up conveniently

377 using e.g. calc.set_bandpath(atoms.cell.bandpath().interpolate(npoints=200))

378

379``.band_structure(bandfile=None)``

380 Read a band structure from _seedname.bands_ file. This returns an ase

381 BandStructure object which may be plotted with e.g.

382 ``calc.band_structure().plot()``

383

384Notes/Issues:

385==============

386

387* Currently *only* the FixAtoms *constraint* is fully supported for reading and

388 writing. There is some experimental support for the FixCartesian constraint.

389

390* There is no support for the CASTEP *unit system*. Units of eV and Angstrom

391 are used throughout. In particular when converting total energies from

392 different calculators, one should check that the same CODATA_ version is

393 used for constants and conversion factors, respectively.

394

395.. _CASTEP: http://www.castep.org/

396

397.. _W: https://en.wikipedia.org/wiki/CASTEP

398

399.. _CODATA: https://physics.nist.gov/cuu/Constants/index.html

400

401.. [1] S. J. Clark, M. D. Segall, C. J. Pickard, P. J. Hasnip, M. J. Probert,

402 K. Refson, M. C. Payne Zeitschrift für Kristallographie 220(5-6)

403 pp.567- 570 (2005) PDF_.

404

405.. _PDF: http://www.tcm.phy.cam.ac.uk/castep/papers/ZKristallogr_2005.pdf

406

407

408End CASTEP Interface Documentation

409 """

410

411 # Class attributes !

412 # keys set through atoms object

413 atoms_keys = [

414 'charges',

415 'ionic_constraints',

416 'lattice_abs',

417 'lattice_cart',

418 'positions_abs',

419 'positions_abs_final',

420 'positions_abs_intermediate',

421 'positions_frac',

422 'positions_frac_final',

423 'positions_frac_intermediate']

424

425 atoms_obj_keys = [

426 'dipole',

427 'energy_free',

428 'energy_zero',

429 'fermi',

430 'forces',

431 'nbands',

432 'positions',

433 'stress',

434 'pressure']

435

436 internal_keys = [

437 '_castep_command',

438 '_check_checkfile',

439 '_copy_pspots',

440 '_link_pspots',

441 '_find_pspots',

442 '_build_missing_pspots',

443 '_directory',

444 '_export_settings',

445 '_force_write',

446 '_label',

447 '_prepare_input_only',

448 '_castep_pp_path',

449 '_rename_existing_dir',

450 '_set_atoms',

451 '_track_output',

452 '_try_reuse',

453 '_pedantic']

454

455 implemented_properties = [

456 'energy',

457 'free_energy',

458 'forces',

459 'stress',

460 'charges',

461 'magmoms',

462 ]

463

464 # specific to this calculator

465 implemented_properties += [

466 'energy_without_dispersion_correction',

467 'free_energy_without_dispersion_correction',

468 'energy_zero_without_dispersion_correction',

469 'energy_with_dispersion_correction',

470 'free_energy_with_dispersion_correction',

471 'energy_zero_with_dispersion_correction',

472 'energy_with_finite_basis_set_correction',

473 'pressure',

474 'hirshfeld_volume_ratios',

475 'hirshfeld_charges',

476 'hirshfeld_magmoms',

477 ]

478

479 def __init__(self, directory='CASTEP', label='castep',

480 castep_command=None, check_castep_version=False,

481 castep_pp_path=None, find_pspots=False, keyword_tolerance=1,

482 castep_keywords=None, **kwargs):

483

484 self.results = {}

485

486 from ase.io.castep import write_cell

487 self._write_cell = write_cell

488

489 if castep_keywords is None:

490 castep_keywords = CastepKeywords(make_param_dict(),

491 make_cell_dict(),

492 [],

493 [],

494 0)

495 if keyword_tolerance < 3:

496 try:

497 castep_keywords = import_castep_keywords(castep_command)

498 except CastepVersionError as e:

499 if keyword_tolerance == 0:

500 raise e

501 else:

502 warnings.warn(str(e))

503

504 self._kw_tol = keyword_tolerance

505 keyword_tolerance = max(keyword_tolerance, 2) # 3 not accepted below

506 self.param = CastepParam(castep_keywords,

507 keyword_tolerance=keyword_tolerance)

508 self.cell = CastepCell(castep_keywords,

509 keyword_tolerance=keyword_tolerance)

510

511 ###################################

512 # Calculator state variables #

513 ###################################

514 self._calls = 0

515 self._castep_version = castep_keywords.castep_version

516

517 # collects warning from .castep files

518 self._warnings = []

519 # collects content from *.err file

520 self._error = None

521 # warnings raised by the ASE interface

522 self._interface_warnings = []

523

524 # store to check if recalculation is necessary

525 self._old_atoms = None

526 self._old_cell = None

527 self._old_param = None

528

529 ###################################

530 # Internal keys #

531 # Allow to tweak the behavior #

532 ###################################

533 self._opt = {}

534 self._castep_command = get_castep_command(castep_command)

535 self._castep_pp_path = get_castep_pp_path(castep_pp_path)

536 self._check_checkfile = True

537 self._copy_pspots = False

538 self._link_pspots = True

539 self._find_pspots = find_pspots

540 self._build_missing_pspots = True

541 self._directory = os.path.abspath(directory)

542 self._export_settings = True

543 self._force_write = True

544 self._label = label

545 self._prepare_input_only = False

546 self._rename_existing_dir = True

547 self._set_atoms = False

548 self._track_output = False

549 self._try_reuse = False

550

551 # turn off the pedantic user warnings

552 self._pedantic = False

553

554 # will be set on during runtime

555 self._seed = None

556

557 ###################################

558 # (Physical) result variables #

559 ###################################

560 self.atoms = None

561 # initialize result variables

562 self._eigenvalues = None

563 self._efermi = None

564 self._ibz_kpts = None

565 self._ibz_weights = None

566 self._band_structure = None

567

568 self._number_of_cell_constraints = None

569 self._output_verbosity = None

570 self._unit_cell = None

571 self._kpoints = None

572

573 # pointers to other files used at runtime

574 self._check_file = None

575 self._castep_bin_file = None

576

577 # plane wave cutoff energy (may be derived during PP generation)

578 self._cut_off_energy = None

579

580 # runtime information

581 self._total_time = None

582 self._peak_memory = None

583

584 # check version of CASTEP options module against current one

585 if check_castep_version:

586 local_castep_version = get_castep_version(self._castep_command)

587 if not hasattr(self, '_castep_version'):

588 warnings.warn('No castep version found')

589 return

590 if local_castep_version != self._castep_version:

591 warnings.warn(

592 'The options module was generated from version %s '

593 'while your are currently using CASTEP version %s' %

594 (self._castep_version,

595 get_castep_version(self._castep_command)))

596 self._castep_version = local_castep_version

597

598 # processes optional arguments in kw style

599 for keyword, value in kwargs.items():

600 # first fetch special keywords issued by ASE CLI

601 if keyword == 'kpts':

602 self.set_kpts(value)

603 elif keyword == 'bandpath':

604 self.set_bandpath(value)

605 elif keyword == 'xc':

606 self.xc_functional = value

607 elif keyword == 'ecut':

608 self.cut_off_energy = value

609 else: # the general case

610 self.__setattr__(keyword, value)

611

612 # TODO: to be self.use_cache = True after revising `__setattr__`

613 self.__dict__['use_cache'] = True

614

615 def set_atoms(self, atoms):

616 self.atoms = atoms

617

618 def get_atoms(self):

619 if self.atoms is None:

620 raise ValueError('Calculator has no atoms')

621 atoms = self.atoms.copy()

622 atoms.calc = self

623 return atoms

624

625 def _get_name(self) -> str:

626 return self.__class__.__name__

627

628 def band_structure(self, bandfile=None):

629 from ase.spectrum.band_structure import BandStructure

630

631 if bandfile is None:

632 bandfile = os.path.join(self._directory, self._seed) + '.bands'

633

634 if not os.path.exists(bandfile):

635 raise ValueError(f'Cannot find band file "{bandfile}".')

636

637 kpts, weights, eigenvalues, efermi = read_bands(bandfile)

638

639 # Get definitions of high-symmetry points

640 special_points = self.atoms.cell.bandpath(npoints=0).special_points

641 bandpath = BandPath(self.atoms.cell,

642 kpts=kpts,

643 special_points=special_points)

644 return BandStructure(bandpath, eigenvalues, reference=efermi)

645

646 def set_bandpath(self, bandpath):

647 """Set a band structure path from ase.dft.kpoints.BandPath object

648

649 This will set the bs_kpoint_list block with a set of specific points

650 determined in ASE. bs_kpoint_spacing will not be used; to modify the

651 number of points, consider using e.g. bandpath.resample(density=20) to

652 obtain a new dense path.

653

654 Args:

655 bandpath (:obj:`ase.dft.kpoints.BandPath` or None):

656 Set to None to remove list of band structure points. Otherwise,

657 sampling will follow BandPath parameters.

658

659 """

660

661 def clear_bs_keywords():

662 bs_keywords = product({'bs_kpoint', 'bs_kpoints'},

663 {'path', 'path_spacing',

664 'list',

665 'mp_grid', 'mp_spacing', 'mp_offset'})

666 for bs_tag in bs_keywords:

667 setattr(self.cell, '_'.join(bs_tag), None)

668

669 if bandpath is None:

670 clear_bs_keywords()

671 elif isinstance(bandpath, BandPath):

672 clear_bs_keywords()

673 self.cell.bs_kpoint_list = [' '.join(map(str, row))

674 for row in bandpath.kpts]

675 else:

676 raise TypeError('Band structure path must be an '

677 'ase.dft.kpoint.BandPath object')

678

679 def set_kpts(self, kpts):

680 """Set k-point mesh/path using a str, tuple or ASE features

681

682 Args:

683 kpts (None, tuple, str, dict):

684

685 This method will set the CASTEP parameters kpoints_mp_grid,

686 kpoints_mp_offset and kpoints_mp_spacing as appropriate. Unused

687 parameters will be set to None (i.e. not included in input files.)

688

689 If kpts=None, all these parameters are set as unused.

690

691 The simplest useful case is to give a 3-tuple of integers specifying

692 a Monkhorst-Pack grid. This may also be formatted as a string separated

693 by spaces; this is the format used internally before writing to the

694 input files.

695

696 A more powerful set of features is available when using a python

697 dictionary with the following allowed keys:

698

699 - 'size' (3-tuple of int) mesh of mesh dimensions

700 - 'density' (float) for BZ sampling density in points per recip. Ang

701 ( kpoint_mp_spacing = 1 / (2pi * density) ). An explicit MP mesh will

702 be set to allow for rounding/centering.

703 - 'spacing' (float) for BZ sampling density for maximum space between

704 sample points in reciprocal space. This is numerically equivalent to

705 the inbuilt ``calc.cell.kpoint_mp_spacing``, but will be converted to

706 'density' to allow for rounding/centering.

707 - 'even' (bool) to round each direction up to the nearest even number;

708 set False for odd numbers, leave as None for no odd/even rounding.

709 - 'gamma' (bool) to offset the Monkhorst-Pack grid to include

710 (0, 0, 0); set False to offset each direction avoiding 0.

711 """

712

713 def clear_mp_keywords():

714 mp_keywords = product({'kpoint', 'kpoints'},

715 {'mp_grid', 'mp_offset',

716 'mp_spacing', 'list'})

717 for kp_tag in mp_keywords:

718 setattr(self.cell, '_'.join(kp_tag), None)

719

720 # Case 1: Clear parameters with set_kpts(None)

721 if kpts is None:

722 clear_mp_keywords()

723

724 # Case 2: list of explicit k-points with weights

725 # e.g. [[ 0, 0, 0, 0.125],

726 # [ 0, -0.5, 0, 0.375],

727 # [-0.5, 0, -0.5, 0.375],

728 # [-0.5, -0.5, -0.5, 0.125]]

729

730 elif (isinstance(kpts, (tuple, list))

731 and isinstance(kpts[0], (tuple, list))):

732

733 if not all(map((lambda row: len(row) == 4), kpts)):

734 raise ValueError(

735 'In explicit kpt list each row should have 4 elements')

736

737 clear_mp_keywords()

738 self.cell.kpoint_list = [' '.join(map(str, row)) for row in kpts]

739

740 # Case 3: list of explicit kpts formatted as list of str

741 # i.e. the internal format of calc.kpoint_list split on \n

742 # e.g. ['0 0 0 0.125', '0 -0.5 0 0.375', '-0.5 0 -0.5 0.375']

743 elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], str):

744

745 if not all(map((lambda row: len(row.split()) == 4), kpts)):

746 raise ValueError(

747 'In explicit kpt list each row should have 4 elements')

748

749 clear_mp_keywords()

750 self.cell.kpoint_list = kpts

751

752 # Case 4: list or tuple of MP samples e.g. [3, 3, 2]

753 elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], int):

754 if len(kpts) != 3:

755 raise ValueError('Monkhorst-pack grid should have 3 values')

756 clear_mp_keywords()

757 self.cell.kpoint_mp_grid = '%d %d %d' % tuple(kpts)

758

759 # Case 5: str representation of Case 3 e.g. '3 3 2'

760 elif isinstance(kpts, str):

761 self.set_kpts([int(x) for x in kpts.split()])

762

763 # Case 6: dict of options e.g. {'size': (3, 3, 2), 'gamma': True}

764 # 'spacing' is allowed but transformed to 'density' to get mesh/offset

765 elif isinstance(kpts, dict):

766 kpts = kpts.copy()

767

768 if (kpts.get('spacing') is not None

769 and kpts.get('density') is not None):

770 raise ValueError(

771 'Cannot set kpts spacing and density simultaneously.')

772 else:

773 if kpts.get('spacing') is not None:

774 kpts = kpts.copy()

775 spacing = kpts.pop('spacing')

776 kpts['density'] = 1 / (2 * np.pi * spacing)

777

778 clear_mp_keywords()

779 size, offsets = kpts2sizeandoffsets(atoms=self.atoms, **kpts)

780 self.cell.kpoint_mp_grid = '%d %d %d' % tuple(size)

781 self.cell.kpoint_mp_offset = '%f %f %f' % tuple(offsets)

782

783 # Case 7: some other iterator. Try treating as a list:

784 elif hasattr(kpts, '__iter__'):

785 self.set_kpts(list(kpts))

786

787 # Otherwise, give up

788 else:

789 raise TypeError('Cannot interpret kpts of this type')

790

791 def todict(self, skip_default=True):

792 """Create dict with settings of .param and .cell"""

793 dct = {}

794 dct['param'] = self.param.get_attr_dict()

795 dct['cell'] = self.cell.get_attr_dict()

796

797 return dct

798

799 def check_state(self, atoms, tol=1e-15):

800 """Check for system changes since last calculation."""

801 return compare_atoms(self._old_atoms, atoms)

802

803 def _castep_find_last_record(self, castep_file):

804 """Checks wether a given castep file has a regular

805 ending message following the last banner message. If this

806 is the case, the line number of the last banner is message

807 is return, otherwise False.

808

809 returns (record_start, record_end, end_found, last_record_complete)

810 """

811 if isinstance(castep_file, str):

812 castep_file = paropen(castep_file, 'r')

813 file_opened = True

814 else:

815 file_opened = False

816 record_starts = []

817 while True:

818 line = castep_file.readline()

819 if (('Welcome' in line or 'Materials Studio' in line)

820 and 'CASTEP' in line):

821 record_starts = [castep_file.tell()] + record_starts

822 if not line:

823 break

824

825 if record_starts == []:

826 warnings.warn('Could not find CASTEP label in result file: %s.'

827 ' Are you sure this is a .castep file?' % castep_file)

828 return None

829

830 # search for regular end of file

831 end_found = False

832 # start to search from record beginning from the back

833 # and see if

834 record_end = -1

835 for record_nr, record_start in enumerate(record_starts):

836 castep_file.seek(record_start)

837 while True:

838 line = castep_file.readline()

839 if not line:

840 break

841 if 'warn' in line.lower():

842 self._warnings.append(line)

843

844 if 'Finalisation time =' in line:

845 end_found = True

846 record_end = castep_file.tell()

847 break

848

849 if end_found:

850 break

851

852 if file_opened:

853 castep_file.close()

854

855 if end_found:

856 # record_nr == 0 corresponds to the last record here

857 if record_nr == 0:

858 return (record_start, record_end, True, True)

859 else:

860 return (record_start, record_end, True, False)

861 else:

862 return (0, record_end, False, False)

863

864 def read(self, castep_file=None):

865 """Read a castep file into the current instance."""

866

867 _close = True

868

869 if castep_file is None:

870 if self._castep_file:

871 castep_file = self._castep_file

872 out = paropen(castep_file, 'r')

873 else:

874 warnings.warn('No CASTEP file specified')

875 return

876 if not os.path.exists(castep_file):

877 warnings.warn('No CASTEP file found')

878

879 elif isinstance(castep_file, str):

880 out = paropen(castep_file, 'r')

881

882 else:

883 # in this case we assume that we have a fileobj already, but check

884 # for attributes in order to avoid extended EAFP blocks.

885 out = castep_file

886

887 # look before you leap...

888 attributes = ['name',

889 'seek',

890 'close',

891 'readline',

892 'tell']

893

894 for attr in attributes:

895 if not hasattr(out, attr):

896 raise TypeError(

897 '"castep_file" is neither str nor valid fileobj')

898

899 castep_file = out.name

900 _close = False

901

902 if self._seed is None:

903 self._seed = os.path.splitext(os.path.basename(castep_file))[0]

904

905 err_file = f'{self._seed}.0001.err'

906 if os.path.exists(err_file):

907 err_file = paropen(err_file)

908 self._error = err_file.read()

909 err_file.close()

910 # we return right-away because it might

911 # just be here from a previous run

912 # look for last result, if several CASTEP

913 # run are appended

914

915 record_start, record_end, end_found, _\

916 = self._castep_find_last_record(out)

917 if not end_found:

918 warnings.warn(

919 f'No regular end found in {castep_file} file. {self._error}')

920 if _close:

921 out.close()

922 return

923 # we return here, because the file has no a regular end

924

925 # now iterate over last CASTEP output in file to extract information

926 # could be generalized as well to extract trajectory from file

927 # holding several outputs

928 n_cell_const = 0

929

930 kpoints = None

931

932 out.seek(record_start)

933

934 # read header

935 parameters_header = _read_header(out)

936 if 'cut_off_energy' in parameters_header:

937 self._cut_off_energy = parameters_header['cut_off_energy']

938 if 'basis_precision' in parameters_header:

939 del parameters_header['cut_off_energy'] # avoid conflict

940 for k, v in parameters_header.items():

941 setattr(self.param, k, v)

942

943 while True:

944 # TODO: add a switch if we have a geometry optimization: record

945 # atoms objects for intermediate steps.

946 try:

947 line = out.readline()

948 if not line or out.tell() > record_end:

949 break

950 elif 'Number of kpoints used' in line:

951 kpoints = int(line.split('=')[-1].strip())

952 elif 'Unit Cell' in line:

953 lattice_real = _read_unit_cell(out)

954 elif 'Cell Contents' in line:

955 while True:

956 line = out.readline()

957 if 'Total number of ions in cell' in line:

958 n_atoms = int(line.split()[7])

959 if 'Total number of species in cell' in line:

960 int(line.split()[7])

961 fields = line.split()

962 if len(fields) == 0:

963 break

964 elif 'Fractional coordinates of atoms' in line:

965 species, custom_species, positions_frac = \

966 _read_fractional_coordinates(out, n_atoms)

967 elif 'Files used for pseudopotentials' in line:

968 while True:

969 line = out.readline()

970 if 'Pseudopotential generated on-the-fly' in line:

971 continue

972 fields = line.split()

973 if (len(fields) >= 2):

974 elem, pp_file = fields

975 self.cell.species_pot = (elem, pp_file)

976 else:

977 break

978 elif 'k-Points For BZ Sampling' in line:

979 # TODO: generalize for non-Monkhorst Pack case

980 # (i.e. kpoint lists) -

981 # kpoints_offset cannot be read this way and

982 # is hence always set to None

983 while True:

984 line = out.readline()

985 if not line.strip():

986 break

987 if 'MP grid size for SCF calculation' in line:

988 # kpoints = ' '.join(line.split()[-3:])

989 # self.kpoints_mp_grid = kpoints

990 # self.kpoints_mp_offset = '0. 0. 0.'

991 # not set here anymore because otherwise

992 # two calculator objects go out of sync

993 # after each calculation triggering unnecessary

994 # recalculation

995 break

996 elif 'Number of cell constraints' in line:

997 n_cell_const = int(line.split()[4])

998

999 elif 'Final energy' in line:

1000 key = 'energy_without_dispersion_correction'

1001 self.results[key] = float(line.split()[-2])

1002 elif 'Final free energy' in line:

1003 key = 'free_energy_without_dispersion_correction'

1004 self.results[key] = float(line.split()[-2])

1005 elif 'NB est. 0K energy' in line:

1006 key = 'energy_zero_without_dispersion_correction'

1007 self.results[key] = float(line.split()[-2])

1008

1009 # Add support for dispersion correction

1010 # filtering due to SEDC is done in get_potential_energy

1011 elif 'Dispersion corrected final energy' in line:

1012 key = 'energy_with_dispersion_correlation'

1013 self.results[key] = float(line.split()[-2])

1014 elif 'Dispersion corrected final free energy' in line:

1015 key = 'free_energy_with_dispersion_correlation'

1016 self.results[key] = float(line.split()[-2])

1017 elif 'NB dispersion corrected est. 0K energy' in line:

1018 key = 'energy_zero_with_dispersion_correlation'

1019 self.results[key] = float(line.split()[-2])

1020

1021 # check if we had a finite basis set correction

1022 elif 'Total energy corrected for finite basis set' in line:

1023 key = 'energy_with_finite_basis_set_correction'

1024 self.results[key] = float(line.split()[-2])

1025

1026 # ******************** Forces *********************

1027 # ************** Symmetrised Forces ***************

1028 # ******************** Constrained Forces ********************

1029 # ******************* Unconstrained Forces *******************

1030 elif re.search(r'\**.* Forces \**', line):

1031 forces, constraints = _read_forces(out, n_atoms)

1032 self.results['forces'] = np.array(forces)

1033

1034 # ***************** Stress Tensor *****************

1035 # *********** Symmetrised Stress Tensor ***********

1036 elif re.search(r'\**.* Stress Tensor \**', line):

1037 self.results.update(_read_stress(out))

1038

1039 elif ('BFGS: starting iteration' in line

1040 or 'BFGS: improving iteration' in line):

1041 if n_cell_const < 6:

1042 lattice_real = []

1043 # backup previous configuration first:

1044 # for highly symmetric systems (where essentially only the

1045 # stress is optimized, but the atomic positions) positions

1046 # are only printed once.

1047 if species:

1048 prev_species = deepcopy(species)

1049 if positions_frac:

1050 prev_positions_frac = deepcopy(positions_frac)

1051 species = []

1052 positions_frac = []

1053

1054 self.results = {}

1055

1056 # extract info from the Mulliken analysis

1057 elif 'Atomic Populations' in line:

1058 self.results.update(_read_mulliken_charges(out))

1059

1060 # extract detailed Hirshfeld analysis (iprint > 1)

1061 elif 'Hirshfeld total electronic charge (e)' in line:

1062 self.results.update(_read_hirshfeld_details(out, n_atoms))

1063

1064 elif 'Hirshfeld Analysis' in line:

1065 self.results.update(_read_hirshfeld_charges(out))

1066

1067 # There is actually no good reason to get out of the loop

1068 # already at this point... or do I miss something?

1069 # elif 'BFGS: Final Configuration:' in line:

1070 # break

1071 elif 'warn' in line.lower():

1072 self._warnings.append(line)

1073

1074 # fetch some last info

1075 elif 'Total time' in line:

1076 pattern = r'.*=\s*([\d\.]+) s'

1077 self._total_time = float(re.search(pattern, line).group(1))

1078

1079 elif 'Peak Memory Use' in line:

1080 pattern = r'.*=\s*([\d]+) kB'

1081 self._peak_memory = int(re.search(pattern, line).group(1))

1082

1083 except Exception as exception:

1084 sys.stderr.write(line + '|-> line triggered exception: '

1085 + str(exception))

1086 raise

1087

1088 if _close:

1089 out.close()

1090

1091 _set_energy_and_free_energy(self.results)

1092

1093 # in highly summetric crystals, positions and symmetry are only printed

1094 # upon init, hence we here restore these original values

1095 if not positions_frac:

1096 positions_frac = prev_positions_frac

1097 if not species:

1098 species = prev_species

1099

1100 positions_frac_atoms = np.array(positions_frac)

1101

1102 if self.atoms and not self._set_atoms:

1103 # compensate for internal reordering of atoms by CASTEP

1104 # using the fact that the order is kept within each species

1105

1106 indices = _get_indices_to_sort_back(self.atoms.symbols, species)

1107 positions_frac_atoms = positions_frac_atoms[indices]

1108 keys = [

1109 'forces',

1110 'charges',

1111 'magmoms',

1112 'hirshfeld_volume_ratios',

1113 'hirshfeld_charges',

1114 'hirshfeld_magmoms',

1115 ]

1116 for k in keys:

1117 if k not in self.results:

1118 continue

1119 self.results[k] = self.results[k][indices]

1120

1121 self.atoms.set_scaled_positions(positions_frac_atoms)

1122

1123 else:

1124 # If no atoms, object has been previously defined

1125 # we define it here and set the Castep() instance as calculator.

1126 # This covers the case that we simply want to open a .castep file.

1127

1128 # The next time around we will have an atoms object, since

1129 # set_calculator also set atoms in the calculator.

1130 if self.atoms:

1131 constraints = self.atoms.constraints

1132 atoms = Atoms(

1133 species,

1134 cell=lattice_real,

1135 constraint=constraints,

1136 pbc=True,

1137 scaled_positions=positions_frac,

1138 )

1139 if custom_species is not None:

1140 atoms.new_array('castep_custom_species',

1141 np.array(custom_species))

1142

1143 atoms.set_initial_charges(self.results.get('charges'))

1144 atoms.set_initial_magnetic_moments(self.results.get('magmoms'))

1145

1146 atoms.calc = self

1147

1148 self._kpoints = kpoints

1149

1150 if self._warnings:

1151 warnings.warn(f'WARNING: {castep_file} contains warnings')

1152 for warning in self._warnings:

1153 warnings.warn(warning)

1154 # reset

1155 self._warnings = []

1156

1157 # Read in eigenvalues from bands file

1158 bands_file = castep_file[:-7] + '.bands'

1159 if (self.param.task.value is not None

1160 and self.param.task.value.lower() == 'bandstructure'):

1161 self._band_structure = self.band_structure(bandfile=bands_file)

1162 else:

1163 try:

1164 (self._ibz_kpts,

1165 self._ibz_weights,

1166 self._eigenvalues,

1167 self._efermi) = read_bands(filename=bands_file)

1168 except FileNotFoundError:

1169 warnings.warn('Could not load .bands file, eigenvalues and '

1170 'Fermi energy are unknown')

1171

1172 # TODO: deprecate once inheriting BaseCalculator

1173 def get_hirsh_volrat(self):

1174 """

1175 Return the Hirshfeld volume ratios.

1176 """

1177 return self.results.get('hirshfeld_volume_ratios')

1178

1179 # TODO: deprecate once inheriting BaseCalculator

1180 def get_spins(self):

1181 """

1182 Return the spins from a plane-wave Mulliken analysis.

1183 """

1184 return self.results['magmoms']

1185

1186 # TODO: deprecate once inheriting BaseCalculator

1187 def get_mulliken_charges(self):

1188 """

1189 Return the charges from a plane-wave Mulliken analysis.

1190 """

1191 return self.results['charges']

1192

1193 # TODO: deprecate once inheriting BaseCalculator

1194 def get_hirshfeld_charges(self):

1195 """

1196 Return the charges from a Hirshfeld analysis.

1197 """

1198 return self.results.get('hirshfeld_charges')

1199

1200 def get_total_time(self):

1201 """

1202 Return the total runtime

1203 """

1204 return self._total_time

1205

1206 def get_peak_memory(self):

1207 """

1208 Return the peak memory usage

1209 """

1210 return self._peak_memory

1211

1212 def set_label(self, label):

1213 """The label is part of each seed, which in turn is a prefix

1214 in each CASTEP related file.

1215 """

1216 # we may think about changing this in future to set `self._directory`

1217 # and `self._label`, as one would expect

1218 self._label = label

1219

1220 def set_pspot(self, pspot, elems=None,

1221 notelems=None,

1222 clear=True,

1223 suffix='usp'):

1224 """Quickly set all pseudo-potentials: Usually CASTEP psp are named

1225 like <Elem>_<pspot>.<suffix> so this function function only expects

1226 the <LibraryName>. It then clears any previous pseudopotential

1227 settings apply the one with <LibraryName> for each element in the

1228 atoms object. The optional elems and notelems arguments can be used

1229 to exclusively assign to some species, or to exclude with notelemens.

1230

1231 Parameters ::

1232

1233 - elems (None) : set only these elements

1234 - notelems (None): do not set the elements

1235 - clear (True): clear previous settings

1236 - suffix (usp): PP file suffix

1237 """

1238 if self._find_pspots:

1239 if self._pedantic:

1240 warnings.warn('Warning: <_find_pspots> = True. '

1241 'Do you really want to use `set_pspots()`? '

1242 'This does not check whether the PP files exist. '

1243 'You may rather want to use `find_pspots()` with '

1244 'the same <pspot>.')

1245

1246 if clear and not elems and not notelems:

1247 self.cell.species_pot.clear()

1248 for elem in set(self.atoms.get_chemical_symbols()):

1249 if elems is not None and elem not in elems:

1250 continue

1251 if notelems is not None and elem in notelems:

1252 continue

1253 self.cell.species_pot = (elem, f'{elem}_{pspot}.{suffix}')

1254

1255 def find_pspots(self, pspot='.+', elems=None,

1256 notelems=None, clear=True, suffix='(usp|UPF|recpot)'):

1257 r"""Quickly find and set all pseudo-potentials by searching in

1258 castep_pp_path:

1259

1260 This one is more flexible than set_pspots, and also checks if the files

1261 are actually available from the castep_pp_path.

1262

1263 Essentially, the function parses the filenames in <castep_pp_path> and

1264 does a regex matching. The respective pattern is:

1265

1266 r"^(<elem>|<elem.upper()>|elem.lower()>(_|-)<pspot>\.<suffix>$"

1267

1268 In most cases, it will be sufficient to not specify anything, if you

1269 use standard CASTEP USPPs with only one file per element in the

1270 <castep_pp_path>.

1271

1272 The function raises a `RuntimeError` if there is some ambiguity

1273 (multiple files per element).

1274

1275 Parameters ::

1276

1277 - pspots ('.+') : as defined above, will be a wildcard if not

1278 specified.

1279 - elems (None) : set only these elements

1280 - notelems (None): do not set the elements

1281 - clear (True): clear previous settings

1282 - suffix (usp|UPF|recpot): PP file suffix

1283 """

1284 if clear and not elems and not notelems:

1285 self.cell.species_pot.clear()

1286

1287 if not os.path.isdir(self._castep_pp_path):

1288 if self._pedantic:

1289 warnings.warn(

1290 'Cannot search directory: {} Folder does not exist'

1291 .format(self._castep_pp_path))

1292 return

1293

1294 # translate the bash wildcard syntax to regex

1295 if pspot == '*':

1296 pspot = '.*'

1297 if suffix == '*':

1298 suffix = '.*'

1299 if pspot == '*':

1300 pspot = '.*'

1301

1302 # GBRV USPPs have a strnage naming schme

1303 pattern = r'^({elem}|{elem_upper}|{elem_lower})(_|-){pspot}\.{suffix}$'

1304

1305 for elem in set(self.atoms.get_chemical_symbols()):

1306 if elems is not None and elem not in elems:

1307 continue

1308 if notelems is not None and elem in notelems:

1309 continue

1310 p = pattern.format(elem=elem,

1311 elem_upper=elem.upper(),

1312 elem_lower=elem.lower(),

1313 pspot=pspot,

1314 suffix=suffix)

1315 pps = []

1316 for f in os.listdir(self._castep_pp_path):

1317 if re.match(p, f):

1318 pps.append(f)

1319 if not pps:

1320 if self._pedantic:

1321 warnings.warn('Pseudopotential for species {} not found!'

1322 .format(elem))

1323 elif len(pps) != 1:

1324 raise RuntimeError(

1325 'Pseudopotential for species ''{} not unique!\n'

1326 .format(elem)

1327 + 'Found the following files in {}\n'

1328 .format(self._castep_pp_path)

1329 + '\n'.join([f' {pp}' for pp in pps]) +

1330 '\nConsider a stricter search pattern in `find_pspots()`.')

1331 else:

1332 self.cell.species_pot = (elem, pps[0])

1333

1334 @_self_getter

1335 def get_total_energy(self, atoms):

1336 """Run CASTEP calculation if needed and return total energy."""

1337 self.update(atoms)

1338 return self.results.get('energy_without_dispersion_correction')

1339

1340 @_self_getter

1341 def get_total_energy_corrected(self, atoms):

1342 """Run CASTEP calculation if needed and return total energy."""

1343 self.update(atoms)

1344 return self.results.get('energy_with_finite_basis_set_correction')

1345

1346 @_self_getter

1347 def get_free_energy(self, atoms):

1348 """Run CASTEP calculation if needed and return free energy.

1349 Only defined with smearing."""

1350 self.update(atoms)

1351 return self.results.get('free_energy_without_dispersion_correction')

1352

1353 @_self_getter

1354 def get_0K_energy(self, atoms):

1355 """Run CASTEP calculation if needed and return 0K energy.

1356 Only defined with smearing."""

1357 self.update(atoms)

1358 return self.results.get('energy_zero_without_dispersion_correction')

1359

1360 @_self_getter

1361 def get_pressure(self, atoms):

1362 """Return the pressure."""

1363 self.update(atoms)

1364 return self.results.get('pressure')

1365

1366 @_self_getter

1367 def get_unit_cell(self, atoms):

1368 """Return the unit cell."""

1369 self.update(atoms)

1370 return self._unit_cell

1371

1372 @_self_getter

1373 def get_kpoints(self, atoms):

1374 """Return the kpoints."""

1375 self.update(atoms)

1376 return self._kpoints

1377

1378 @_self_getter

1379 def get_number_cell_constraints(self, atoms):

1380 """Return the number of cell constraints."""

1381 self.update(atoms)

1382 return self._number_of_cell_constraints

1383

1384 def update(self, atoms):

1385 """Checks if atoms object or calculator changed and

1386 runs calculation if so.

1387 """

1388 if self.calculation_required(atoms, None):

1389 self.calculate(atoms, [], [])

1390

1391 def calculation_required(self, atoms, properties):

1392 """Checks wether anything changed in the atoms object or CASTEP

1393 settings since the last calculation using this instance.

1394 """

1395 # SPR: what happens with the atoms parameter here? Why don't we use it?

1396 # from all that I can tell we need to compare against atoms instead of

1397 # self.atoms

1398 # if not self.atoms == self._old_atoms:

1399 if atoms != self._old_atoms:

1400 return True

1401 if self._old_param is None or self._old_cell is None:

1402 return True

1403 if self.param._options != self._old_param._options:

1404 return True

1405 if self.cell._options != self._old_cell._options:

1406 return True

1407 return False

1408

1409 def calculate(self, atoms, properties, system_changes):

1410 """Write all necessary input file and call CASTEP."""

1411 self.prepare_input_files(atoms, force_write=self._force_write)

1412 if not self._prepare_input_only:

1413 self.run()

1414 self.read()

1415

1416 # we need to push the old state here!

1417 # although run() pushes it, read() may change the atoms object

1418 # again.

1419 # yet, the old state is supposed to be the one AFTER read()

1420 self.push_oldstate()

1421

1422 def push_oldstate(self):

1423 """This function pushes the current state of the (CASTEP) Atoms object

1424 onto the previous state. Or in other words after calling this function,

1425 calculation_required will return False and enquiry functions just

1426 report the current value, e.g. get_forces(), get_potential_energy().

1427 """

1428 # make a snapshot of all current input

1429 # to be able to test if recalculation

1430 # is necessary

1431 self._old_atoms = self.atoms.copy()

1432 self._old_param = deepcopy(self.param)

1433 self._old_cell = deepcopy(self.cell)

1434

1435 def initialize(self, *args, **kwargs):

1436 """Just an alias for prepar_input_files to comply with standard

1437 function names in ASE.

1438 """

1439 self.prepare_input_files(*args, **kwargs)

1440

1441 def prepare_input_files(self, atoms=None, force_write=None):

1442 """Only writes the input .cell and .param files and return

1443 This can be useful if one quickly needs to prepare input files

1444 for a cluster where no python or ASE is available. One can than

1445 upload the file manually and read out the results using

1446 Castep().read().

1447 """

1448

1449 if self.param.reuse.value is None:

1450 if self._pedantic:

1451 warnings.warn(

1452 'You have not set e.g. calc.param.reuse = True. '

1453 'Reusing a previous calculation may save CPU time! '

1454 'The interface will make sure by default, .check exists. '

1455 'file before adding this statement to the .param file.')

1456 if self.param.num_dump_cycles.value is None:

1457 if self._pedantic:

1458 warnings.warn(

1459 'You have not set e.g. calc.param.num_dump_cycles = 0. '

1460 'This can save you a lot of disk space. One only needs '

1461 '*wvfn* if electronic convergence is not achieved.')

1462 from ase.io.castep import write_param

1463

1464 if atoms is None:

1465 atoms = self.atoms

1466 else:

1467 self.atoms = atoms

1468

1469 if force_write is None:

1470 force_write = self._force_write

1471

1472 # if we have new instance of the calculator,

1473 # move existing results out of the way, first

1474 if (os.path.isdir(self._directory)

1475 and self._calls == 0

1476 and self._rename_existing_dir):

1477 if os.listdir(self._directory) == []:

1478 os.rmdir(self._directory)

1479 else:

1480 # rename appending creation date of the directory

1481 ctime = time.localtime(os.lstat(self._directory).st_ctime)

1482 os.rename(self._directory, '%s.bak-%s' %

1483 (self._directory,

1484 time.strftime('%Y%m%d-%H%M%S', ctime)))

1485

1486 # create work directory

1487 if not os.path.isdir(self._directory):

1488 os.makedirs(self._directory, 0o775)

1489

1490 # we do this every time, not only upon first call

1491 # if self._calls == 0:

1492 self._fetch_pspots()

1493

1494 # if _try_reuse is requested and this

1495 # is not the first run, we try to find

1496 # the .check file from the previous run

1497 # this is only necessary if _track_output

1498 # is set to true

1499 if self._try_reuse and self._calls > 0:

1500 if os.path.exists(self._abs_path(self._check_file)):

1501 self.param.reuse = self._check_file

1502 elif os.path.exists(self._abs_path(self._castep_bin_file)):

1503 self.param.reuse = self._castep_bin_file

1504 self._seed = self._build_castep_seed()

1505 self._check_file = f'{self._seed}.check'

1506 self._castep_bin_file = f'{self._seed}.castep_bin'

1507 self._castep_file = self._abs_path(f'{self._seed}.castep')

1508

1509 # write out the input file

1510 self._write_cell(self._abs_path(f'{self._seed}.cell'),

1511 self.atoms, castep_cell=self.cell,

1512 force_write=force_write)

1513

1514 if self._export_settings:

1515 interface_options = self._opt

1516 else:

1517 interface_options = None

1518 write_param(self._abs_path(f'{self._seed}.param'), self.param,

1519 check_checkfile=self._check_checkfile,

1520 force_write=force_write,

1521 interface_options=interface_options,)

1522

1523 def _build_castep_seed(self):

1524 """Abstracts to construction of the final castep <seed>

1525 with and without _tracking_output.

1526 """

1527 if self._track_output:

1528 return '%s-%06d' % (self._label, self._calls)

1529 else:

1530 return f'{(self._label)}'

1531

1532 def _abs_path(self, path):

1533 # Create an absolute path for a file to put in the working directory

1534 return os.path.join(self._directory, path)

1535

1536 def run(self):

1537 """Simply call castep. If the first .err file

1538 contains text, this will be printed to the screen.

1539 """

1540 # change to target directory

1541 self._calls += 1

1542

1543 # run castep itself

1544 stdout, stderr = shell_stdouterr('{} {}'.format(self._castep_command,

1545 self._seed),

1546 cwd=self._directory)

1547 if stdout:

1548 print(f'castep call stdout:\n{stdout}')

1549 if stderr:

1550 print(f'castep call stderr:\n{stderr}')

1551

1552 # shouldn't it be called after read()???

1553 # self.push_oldstate()

1554

1555 # check for non-empty error files

1556 err_file = self._abs_path(f'{self._seed}.0001.err')

1557 if os.path.exists(err_file):

1558 with open(err_file) as err_file:

1559 self._error = err_file.read()

1560 if self._error:

1561 raise RuntimeError(self._error)

1562

1563 def __repr__(self):

1564 """Returns generic, fast to capture representation of

1565 CASTEP settings along with atoms object.

1566 """

1567 expr = ''

1568 expr += '-----------------Atoms--------------------\n'

1569 if self.atoms is not None:

1570 expr += str('%20s\n' % self.atoms)

1571 else:

1572 expr += 'None\n'

1573

1574 expr += '-----------------Param keywords-----------\n'

1575 expr += str(self.param)

1576 expr += '-----------------Cell keywords------------\n'

1577 expr += str(self.cell)

1578 expr += '-----------------Internal keys------------\n'

1579 for key in self.internal_keys:

1580 expr += '%20s : %s\n' % (key, self._opt[key])

1581

1582 return expr

1583

1584 def __getattr__(self, attr):

1585 """___getattr___ gets overloaded to reroute the internal keys

1586 and to be able to easily store them in in the param so that

1587 they can be read in again in subsequent calls.

1588 """

1589 if attr in self.internal_keys:

1590 return self._opt[attr]

1591 if attr in ['__repr__', '__str__']:

1592 raise AttributeError

1593 elif attr not in self.__dict__:

1594 raise AttributeError(f'Attribute {attr} not found')

1595 else:

1596 return self.__dict__[attr]

1597

1598 def __setattr__(self, attr, value):

1599 """We overload the settattr method to make value assignment

1600 as pythonic as possible. Internal values all start with _.

1601 Value assigment is case insensitive!

1602 """

1603

1604 if attr.startswith('_'):

1605 # internal variables all start with _

1606 # let's check first if they are close but not identical

1607 # to one of the switches, that the user accesses directly

1608 similars = difflib.get_close_matches(attr, self.internal_keys,

1609 cutoff=0.9)

1610 if attr not in self.internal_keys and similars:

1611 warnings.warn(

1612 'Warning: You probably tried one of: '

1613 f'{similars} but typed {attr}')

1614 if attr in self.internal_keys:

1615 self._opt[attr] = value

1616 if attr == '_track_output':

1617 if value:

1618 self._try_reuse = True

1619 if self._pedantic:

1620 warnings.warn(

1621 'You switched _track_output on. This will '

1622 'consume a lot of disk-space. The interface '

1623 'also switched _try_reuse on, which will '

1624 'try to find the last check file. Set '

1625 '_try_reuse = False, if you need '

1626 'really separate calculations')

1627 elif '_try_reuse' in self._opt and self._try_reuse:

1628 self._try_reuse = False

1629 if self._pedantic:

1630 warnings.warn('_try_reuse is set to False, too')

1631 else:

1632 self.__dict__[attr] = value

1633 return

1634 elif attr in ['atoms', 'cell', 'param', 'results']:

1635 if value is not None:

1636 if attr == 'atoms' and not isinstance(value, Atoms):

1637 raise TypeError(

1638 f'{value} is not an instance of Atoms.')

1639 elif attr == 'cell' and not isinstance(value, CastepCell):

1640 raise TypeError(

1641 f'{value} is not an instance of CastepCell.')

1642 elif attr == 'param' and not isinstance(value, CastepParam):

1643 raise TypeError(

1644 f'{value} is not an instance of CastepParam.')

1645 # These 3 are accepted right-away, no matter what

1646 self.__dict__[attr] = value

1647 return

1648 elif attr in self.atoms_obj_keys:

1649 # keywords which clearly belong to the atoms object are

1650 # rerouted to go there

1651 self.atoms.__dict__[attr] = value

1652 return

1653 elif attr in self.atoms_keys:

1654 # CASTEP keywords that should go into the atoms object

1655 # itself are blocked

1656 warnings.warn('Ignoring setings of "%s", since this has to be set '

1657 'through the atoms object' % attr)

1658 return

1659

1660 attr = attr.lower()

1661 if attr not in (list(self.cell._options.keys())

1662 + list(self.param._options.keys())):

1663 # what is left now should be meant to be a castep keyword

1664 # so we first check if it defined, and if not offer some error

1665 # correction

1666 if self._kw_tol == 0:

1667 similars = difflib.get_close_matches(

1668 attr,

1669 self.cell._options.keys() + self.param._options.keys())

1670 if similars:

1671 raise RuntimeError(

1672 f'Option "{attr}" not known! You mean "{similars[0]}"?')

1673 else:

1674 raise RuntimeError(f'Option "{attr}" is not known!')

1675 else:

1676 warnings.warn('Option "%s" is not known - please set any new'

1677 ' options directly in the .cell or .param '

1678 'objects' % attr)

1679 return

1680

1681 # here we know it must go into one of the component param or cell

1682 # so we first determine which one

1683 if attr in self.param._options.keys():

1684 comp = 'param'

1685 elif attr in self.cell._options.keys():

1686 comp = 'cell'

1687 else:

1688 raise RuntimeError('Programming error: could not attach '

1689 'the keyword to an input file')

1690

1691 self.__dict__[comp].__setattr__(attr, value)

1692

1693 def merge_param(self, param, overwrite=True, ignore_internal_keys=False):

1694 """Parse a param file and merge it into the current parameters."""

1695 if isinstance(param, CastepParam):

1696 for key, option in param._options.items():

1697 if option.value is not None:

1698 self.param.__setattr__(key, option.value)

1699 return

1700

1701 elif isinstance(param, str):

1702 param_file = open(param)

1703 _close = True

1704

1705 else:

1706 # in this case we assume that we have a fileobj already, but check

1707 # for attributes in order to avoid extended EAFP blocks.

1708 param_file = param

1709

1710 # look before you leap...

1711 attributes = ['name',

1712 'close'

1713 'readlines']

1714

1715 for attr in attributes:

1716 if not hasattr(param_file, attr):

1717 raise TypeError('"param" is neither CastepParam nor str '

1718 'nor valid fileobj')

1719

1720 param = param_file.name

1721 _close = False

1722

1723 self, int_opts = read_param(fd=param_file, calc=self,

1724 get_interface_options=True)

1725

1726 # Add the interface options

1727 for k, val in int_opts.items():

1728 if (k in self.internal_keys and not ignore_internal_keys):

1729 if val in _tf_table:

1730 val = _tf_table[val]

1731 self._opt[k] = val

1732

1733 if _close:

1734 param_file.close()

1735

1736 def dryrun_ok(self, dryrun_flag='-dryrun'):

1737 """Starts a CASTEP run with the -dryrun flag [default]

1738 in a temporary and check wether all variables are initialized

1739 correctly. This is recommended for every bigger simulation.

1740 """

1741 from ase.io.castep import write_param

1742

1743 temp_dir = tempfile.mkdtemp()

1744 self._fetch_pspots(temp_dir)

1745 seed = 'dryrun'

1746

1747 self._write_cell(os.path.join(temp_dir, f'{seed}.cell'),

1748 self.atoms, castep_cell=self.cell)

1749 # This part needs to be modified now that we rely on the new formats.py

1750 # interface

1751 if not os.path.isfile(os.path.join(temp_dir, f'{seed}.cell')):

1752 warnings.warn(f'{seed}.cell not written - aborting dryrun')

1753 return None

1754 write_param(os.path.join(temp_dir, f'{seed}.param'), self.param, )

1755

1756 stdout, stderr = shell_stdouterr(('{} {} {}'.format(

1757 self._castep_command,

1758 seed,

1759 dryrun_flag)),

1760 cwd=temp_dir)

1761

1762 if stdout:

1763 print(stdout)

1764 if stderr:

1765 print(stderr)

1766 with open(os.path.join(temp_dir, f'{seed}.castep')) as result_file:

1767 txt = result_file.read()

1768 ok_string = (r'.*DRYRUN finished.*No problems found with input '

1769 r'files.*')

1770 match = re.match(ok_string, txt, re.DOTALL)

1771

1772 m = re.search(r'Number of kpoints used =\s*([0-9]+)', txt)

1773 if m:

1774 self._kpoints = int(m.group(1))

1775 else:

1776 warnings.warn(

1777 'Couldn\'t fetch number of kpoints from dryrun CASTEP file')

1778

1779 err_file = os.path.join(temp_dir, f'{seed}.0001.err')

1780 if match is None and os.path.exists(err_file):

1781 with open(err_file) as err_file:

1782 self._error = err_file.read()

1783 shutil.rmtree(temp_dir)

1784

1785 # re.match return None is the string does not match

1786 return match is not None

1787

1788 def _fetch_pspots(self, directory=None):

1789 """Put all specified pseudo-potentials into the working directory.

1790 """

1791 # should be a '==' right? Otherwise setting _castep_pp_path is not

1792 # honored.

1793 if (not cfg.get('PSPOT_DIR', None)

1794 and self._castep_pp_path == os.path.abspath('.')):

1795 # By default CASTEP consults the environment variable

1796 # PSPOT_DIR. If this contains a list of colon separated

1797 # directories it will check those directories for pseudo-

1798 # potential files if not in the current directory.

1799 # Thus if PSPOT_DIR is set there is nothing left to do.

1800 # If however PSPOT_DIR was been accidentally set

1801 # (e.g. with regards to a different program)

1802 # setting CASTEP_PP_PATH to an explicit value will

1803 # still be honored.

1804 return

1805

1806 if directory is None:

1807 directory = self._directory

1808 if not os.path.isdir(self._castep_pp_path):

1809 warnings.warn(f'PSPs directory {self._castep_pp_path} not found')

1810 pspots = {}

1811 if self._find_pspots:

1812 self.find_pspots()

1813 if self.cell.species_pot.value is not None:

1814 for line in self.cell.species_pot.value.split('\n'):

1815 line = line.split()

1816 if line:

1817 pspots[line[0]] = line[1]

1818 for species in self.atoms.get_chemical_symbols():

1819 if not pspots or species not in pspots.keys():

1820 if self._build_missing_pspots:

1821 if self._pedantic:

1822 warnings.warn(

1823 'Warning: you have no PP specified for %s. '

1824 'CASTEP will now generate an on-the-fly '

1825 'potentials. '

1826 'For sake of numerical consistency and efficiency '

1827 'this is discouraged.' % species)

1828 else:

1829 raise RuntimeError(

1830 f'Warning: you have no PP specified for {species}.')

1831 if self.cell.species_pot.value:

1832 for (species, pspot) in pspots.items():

1833 orig_pspot_file = os.path.join(self._castep_pp_path, pspot)

1834 cp_pspot_file = os.path.join(directory, pspot)

1835 if (os.path.exists(orig_pspot_file)

1836 and not os.path.exists(cp_pspot_file)):

1837 if self._copy_pspots:

1838 shutil.copy(orig_pspot_file, directory)

1839 elif self._link_pspots:

1840 os.symlink(orig_pspot_file, cp_pspot_file)

1841 else:

1842 if self._pedantic:

1843 warnings.warn(ppwarning)

1844

1845

1846ppwarning = ('Warning: PP files have neither been '

1847 'linked nor copied to the working directory. Make '

1848 'sure to set the evironment variable PSPOT_DIR '

1849 'accordingly!')

1850

1851

1852def _read_header(out: io.TextIOBase):

1853 """Read the header blocks from a .castep file.

1854

1855 Returns

1856 -------

1857 parameters : dict

1858 Dictionary storing keys and values of a .param file.

1859 """

1860 def _parse_on_off(_: str):

1861 return {'on': True, 'off': False}[_]

1862

1863 read_title = False

1864 parameters: Dict[str, Any] = {}

1865 while True:

1866 line = out.readline()

1867 if len(line) == 0: # end of file

1868 break

1869 if re.search(r'^\s*\*+$', line) and read_title: # end of header

1870 break

1871

1872 if re.search(r'\**.* Title \**', line):

1873 read_title = True

1874

1875 # General Parameters

1876

1877 elif 'output verbosity' in line:

1878 parameters['iprint'] = int(line.split()[-1][1])

1879 elif re.match(r'\stype of calculation\s*:', line):

1880 parameters['task'] = {

1881 'single point energy': 'SinglePoint',

1882 'geometry optimization': 'GeometryOptimization',

1883 'band structure': 'BandStructure',

1884 'molecular dynamics': 'MolecularDynamics',

1885 'optical properties': 'Optics',

1886 'phonon calculation': 'Phonon',

1887 'E-field calculation': 'Efield',

1888 'Phonon followed by E-field': 'Phonon+Efield',

1889 'transition state search': 'TransitionStateSearch',

1890 'Magnetic Resonance': 'MagRes',

1891 'Core level spectra': 'Elnes',

1892 'Electronic Spectroscopy': 'ElectronicSpectroscopy',

1893 }[line.split(':')[-1].strip()]

1894 elif 'stress calculation' in line:

1895 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])

1896 elif 'calculation limited to maximum' in line:

1897 parameters['run_time'] = float(line.split()[-2])

1898 elif re.match(r'\soptimization strategy\s*:', line):

1899 parameters['opt_strategy'] = {

1900 'maximize speed(+++)': 'Speed',

1901 'minimize memory(---)': 'Memory',

1902 'balance speed and memory': 'Default',

1903 }[line.split(':')[-1].strip()]

1904

1905 # Exchange-Correlation Parameters

1906

1907 elif re.match(r'\susing functional\s*:', line):

1908 parameters['xc_functional'] = {

1909 'Local Density Approximation': 'LDA',

1910 'Perdew Wang (1991)': 'PW91',

1911 'Perdew Burke Ernzerhof': 'PBE',

1912 'revised Perdew Burke Ernzerhof': 'RPBE',

1913 'PBE with Wu-Cohen exchange': 'WC',

1914 'PBE for solids (2008)': 'PBESOL',

1915 'Hartree-Fock': 'HF',

1916 'Hartree-Fock +': 'HF-LDA',

1917 'Screened Hartree-Fock': 'sX',

1918 'Screened Hartree-Fock + ': 'sX-LDA',

1919 'hybrid PBE0': 'PBE0',

1920 'hybrid B3LYP': 'B3LYP',

1921 'hybrid HSE03': 'HSE03',

1922 'hybrid HSE06': 'HSE06',

1923 'RSCAN': 'RSCAN',

1924 }[line.split(':')[-1].strip()]

1925 elif 'DFT+D: Semi-empirical dispersion correction' in line:

1926 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])

1927 elif 'SEDC with' in line:

1928 parameters['sedc_scheme'] = {

1929 'OBS correction scheme': 'OBS',

1930 'G06 correction scheme': 'G06',

1931 'D3 correction scheme': 'D3',

1932 'D3(BJ) correction scheme': 'D3-BJ',

1933 'D4 correction scheme': 'D4',

1934 'JCHS correction scheme': 'JCHS',

1935 'TS correction scheme': 'TS',

1936 'TSsurf correction scheme': 'TSSURF',

1937 'TS+SCS correction scheme': 'TSSCS',

1938 'aperiodic TS+SCS correction scheme': 'ATSSCS',

1939 'aperiodic MBD@SCS method': 'AMBD',

1940 'MBD@SCS method': 'MBD',

1941 'aperiodic MBD@rsSCS method': 'AMBD*',

1942 'MBD@rsSCS method': 'MBD*',

1943 'XDM correction scheme': 'XDM',

1944 }[line.split(':')[-1].strip()]

1945

1946 # Basis Set Parameters

1947

1948 elif 'basis set accuracy' in line:

1949 parameters['basis_precision'] = line.split()[-1]

1950 elif 'plane wave basis set cut-off' in line:

1951 parameters['cut_off_energy'] = float(line.split()[-2])

1952 elif re.match(r'\sfinite basis set correction\s*:', line):

1953 parameters['finite_basis_corr'] = {

1954 'none': 0,

1955 'manual': 1,

1956 'automatic': 2,

1957 }[line.split()[-1]]

1958

1959 # Electronic Parameters

1960

1961 elif 'treating system as spin-polarized' in line:

1962 parameters['spin_polarized'] = True

1963

1964 # Electronic Minimization Parameters

1965

1966 elif 'Treating system as non-metallic' in line:

1967 parameters['fix_occupancy'] = True

1968 elif 'total energy / atom convergence tol.' in line:

1969 parameters['elec_energy_tol'] = float(line.split()[-2])

1970 elif 'convergence tolerance window' in line:

1971 parameters['elec_convergence_win'] = int(line.split()[-2])

1972 elif 'max. number of SCF cycles:' in line:

1973 parameters['max_scf_cycles'] = float(line.split()[-1])

1974 elif 'dump wavefunctions every' in line:

1975 parameters['num_dump_cycles'] = float(line.split()[-3])

1976

1977 # Density Mixing Parameters

1978

1979 elif 'density-mixing scheme' in line:

1980 parameters['mixing_scheme'] = line.split()[-1]

1981

1982 return parameters

1983

1984

1985def _read_unit_cell(out: io.TextIOBase):

1986 """Read a Unit Cell block from a .castep file."""

1987 while True:

1988 line = out.readline()

1989 fields = line.split()

1990 if len(fields) == 6:

1991 break

1992 lattice_real = []

1993 for _ in range(3):

1994 lattice_real.append([float(f) for f in fields[0:3]])

1995 line = out.readline()

1996 fields = line.split()

1997 return np.array(lattice_real)

1998

1999

2000def _read_forces(out: io.TextIOBase, n_atoms: int):

2001 """Read a block for atomic forces from a .castep file."""

2002 constraints: List[FixConstraint] = []

2003 forces = []

2004 while True:

2005 line = out.readline()

2006 fields = line.split()

2007 if len(fields) == 7:

2008 break

2009 for n in range(n_atoms):

2010 consd = np.array([0, 0, 0])

2011 fxyz = [0.0, 0.0, 0.0]

2012 for i, force_component in enumerate(fields[-4:-1]):

2013 if force_component.count("(cons'd)") > 0:

2014 consd[i] = 1

2015 # remove constraint labels in force components

2016 fxyz[i] = float(force_component.replace("(cons'd)", ''))

2017 if consd.all():

2018 constraints.append(FixAtoms(n))

2019 elif consd.any():

2020 constraints.append(FixCartesian(n, consd))

2021 forces.append(fxyz)

2022 line = out.readline()

2023 fields = line.split()

2024 return forces, constraints

2025

2026

2027def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):

2028 """Read fractional coordinates from a .castep file."""

2029 species: List[str] = []

2030 custom_species: Optional[List[str]] = None # A CASTEP special thing

2031 positions_frac: List[List[float]] = []

2032 while True:

2033 line = out.readline()

2034 fields = line.split()

2035 if len(fields) == 7:

2036 break

2037 for _ in range(n_atoms):

2038 spec_custom = fields[1].split(':', 1)

2039 elem = spec_custom[0]

2040 if len(spec_custom) > 1 and custom_species is None:

2041 # Add it to the custom info!

2042 custom_species = list(species)

2043 species.append(elem)

2044 if custom_species is not None:

2045 custom_species.append(fields[1])

2046 positions_frac.append([float(s) for s in fields[3:6]])

2047 line = out.readline()

2048 fields = line.split()

2049 return species, custom_species, positions_frac

2050

2051

2052def _read_stress(out: io.TextIOBase):

2053 """Read a block for the stress tensor from a .castep file."""

2054 while True:

2055 line = out.readline()

2056 fields = line.split()

2057 if len(fields) == 6:

2058 break

2059 results = {}

2060 stress = []

2061 for _ in range(3):

2062 stress.append([float(s) for s in fields[2:5]])

2063 line = out.readline()

2064 fields = line.split()

2065 # stress in .castep file is given in GPa

2066 results['stress'] = np.array(stress) * units.GPa

2067 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]

2068 line = out.readline()

2069 if "Pressure:" in line:

2070 results['pressure'] = float(line.split()[-2]) * units.GPa

2071 return results

2072

2073

2074def _read_mulliken_charges(out: io.TextIOBase):

2075 """Read a block for Mulliken charges from a .castep file."""

2076 for i in range(3):

2077 line = out.readline()

2078 if i == 1:

2079 spin_polarized = 'Spin' in line

2080 results = defaultdict(list)

2081 while True:

2082 line = out.readline()

2083 fields = line.split()

2084 if len(fields) == 1:

2085 break

2086 if spin_polarized:

2087 if len(fields) != 7: # due to CASTEP 18 outformat changes

2088 results['charges'].append(float(fields[-2]))

2089 results['magmoms'].append(float(fields[-1]))

2090 else:

2091 results['charges'].append(float(fields[-1]))

2092 return {k: np.array(v) for k, v in results.items()}

2093

2094

2095def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):

2096 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""

2097 results = defaultdict(list)

2098 for _ in range(n_atoms):

2099 while True:

2100 line = out.readline()

2101 if line.strip() == '':

2102 break # end for each atom

2103 if 'Hirshfeld / free atomic volume :' in line:

2104 line = out.readline()

2105 fields = line.split()

2106 results['hirshfeld_volume_ratios'].append(float(fields[0]))

2107 return {k: np.array(v) for k, v in results.items()}

2108

2109

2110def _read_hirshfeld_charges(out: io.TextIOBase):

2111 """Read a block for Hirshfeld charges from a .castep file."""

2112 for i in range(3):

2113 line = out.readline()

2114 if i == 1:

2115 spin_polarized = 'Spin' in line

2116 results = defaultdict(list)

2117 while True:

2118 line = out.readline()

2119 fields = line.split()

2120 if len(fields) == 1:

2121 break

2122 if spin_polarized:

2123 results['hirshfeld_charges'].append(float(fields[-2]))

2124 results['hirshfeld_magmoms'].append(float(fields[-1]))

2125 else:

2126 results['hirshfeld_charges'].append(float(fields[-1]))

2127 return {k: np.array(v) for k, v in results.items()}

2128

2129

2130def _get_indices_to_sort_back(symbols, species):

2131 """Get indices to sort spicies in .castep back to atoms.symbols."""

2132 uniques = np.unique(symbols)

2133 indices = np.full(len(symbols), -1, dtype=int)

2134 for unique in uniques:

2135 where_symbols = [i for i, s in enumerate(symbols) if s == unique]

2136 where_species = [j for j, s in enumerate(species) if s == unique]

2137 for i, j in zip(where_symbols, where_species):

2138 indices[i] = j

2139 if -1 in indices:

2140 not_assigned = [_ for _ in indices if _ == -1]

2141 raise RuntimeError(f'Atoms {not_assigned} where not assigned.')

2142 return indices

2143

2144

2145def _set_energy_and_free_energy(results: Dict[str, Any]):

2146 """Set values referred to as `energy` and `free_energy`."""

2147 if 'energy_with_dispersion_correction' in results:

2148 suffix = '_with_dispersion_correction'

2149 else:

2150 suffix = '_without_dispersion_correction'

2151

2152 if 'free_energy' + suffix in results: # metallic

2153 keye = 'energy_zero' + suffix

2154 keyf = 'free_energy' + suffix

2155 else: # non-metallic

2156 # The finite basis set correction is applied to the energy at finite T

2157 # (not the energy at 0 K). We should hence refer to the corrected value

2158 # as `energy` only when the free energy is unavailable, i.e., only when

2159 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.

2160 if 'energy_with_finite_basis_set_correction' in results:

2161 keye = 'energy_with_finite_basis_set_correction'

2162 else:

2163 keye = 'energy' + suffix

2164 keyf = 'energy' + suffix

2165

2166 results['energy'] = results[keye]

2167 results['free_energy'] = results[keyf]

2168

2169

2170def get_castep_version(castep_command):

2171 """This returns the version number as printed in the CASTEP banner.

2172 For newer CASTEP versions ( > 6.1) the --version command line option

2173 has been added; this will be attempted first.

2174 """

2175 import tempfile

2176 with tempfile.TemporaryDirectory() as temp_dir:

2177 return _get_castep_version(castep_command, temp_dir)

2178

2179

2180def _get_castep_version(castep_command, temp_dir):

2181 jname = 'dummy_jobname'

2182 stdout, stderr = '', ''

2183 fallback_version = 16. # CASTEP 16.0 and 16.1 report version wrongly

2184 try:

2185 stdout, stderr = subprocess.Popen(

2186 castep_command.split() + ['--version'],

2187 stderr=subprocess.PIPE,

2188 stdout=subprocess.PIPE, cwd=temp_dir,

2189 universal_newlines=True).communicate()

2190 if 'CASTEP version' not in stdout:

2191 stdout, stderr = subprocess.Popen(

2192 castep_command.split() + [jname],

2193 stderr=subprocess.PIPE,

2194 stdout=subprocess.PIPE, cwd=temp_dir,

2195 universal_newlines=True).communicate()

2196 except Exception: # XXX Which kind of exception?

2197 msg = ''

2198 msg += 'Could not determine the version of your CASTEP binary \n'

2199 msg += 'This usually means one of the following \n'

2200 msg += ' * you do not have CASTEP installed \n'

2201 msg += ' * you have not set the CASTEP_COMMAND to call it \n'

2202 msg += ' * you have provided a wrong CASTEP_COMMAND. \n'

2203 msg += ' Make sure it is in your PATH\n\n'

2204 msg += stdout

2205 msg += stderr

2206 raise CastepVersionError(msg)

2207 if 'CASTEP version' in stdout:

2208 output_txt = stdout.split('\n')

2209 version_re = re.compile(r'CASTEP version:\s*([0-9\.]*)')

2210 else:

2211 with open(os.path.join(temp_dir, f'{jname}.castep')) as output:

2212 output_txt = output.readlines()

2213 version_re = re.compile(r'(?<=CASTEP version )[0-9.]*')

2214 # shutil.rmtree(temp_dir)

2215 for line in output_txt:

2216 if 'CASTEP version' in line:

2217 try:

2218 return float(version_re.findall(line)[0])

2219 except ValueError:

2220 # Fallback for buggy --version on CASTEP 16.0, 16.1

2221 return fallback_version

2222

2223

2224def create_castep_keywords(castep_command, filename='castep_keywords.json',

2225 force_write=True, path='.', fetch_only=None):

2226 """This function allows to fetch all available keywords from stdout

2227 of an installed castep binary. It furthermore collects the documentation

2228 to harness the power of (ipython) inspection and type for some basic

2229 type checking of input. All information is stored in a JSON file that is

2230 not distributed by default to avoid breaking the license of CASTEP.

2231 """

2232 # Takes a while ...

2233 # Fetch all allowed parameters

2234 # fetch_only : only fetch that many parameters (for testsuite only)

2235 suffixes = ['cell', 'param']

2236

2237 filepath = os.path.join(path, filename)

2238

2239 if os.path.exists(filepath) and not force_write:

2240 warnings.warn('CASTEP Options Module file exists. '

2241 'You can overwrite it by calling '

2242 'python castep.py -f [CASTEP_COMMAND].')

2243 return False

2244

2245 # Not saving directly to file her to prevent half-generated files

2246 # which will cause problems on future runs

2247

2248 castep_version = get_castep_version(castep_command)

2249

2250 help_all, _ = shell_stdouterr(f'{castep_command} -help all')

2251

2252 # Filter out proper keywords

2253 try:

2254 # The old pattern does not math properly as in CASTEP as of v8.0 there

2255 # are some keywords for the semi-empircal dispersion correction (SEDC)

2256 # which also include numbers.

2257 if castep_version < 7.0:

2258 pattern = r'((?<=^ )[A-Z_]{2,}|(?<=^)[A-Z_]{2,})'

2259 else:

2260 pattern = r'((?<=^ )[A-Z_\d]{2,}|(?<=^)[A-Z_\d]{2,})'

2261

2262 raw_options = re.findall(pattern, help_all, re.MULTILINE)

2263 except Exception:

2264 warnings.warn(f'Problem parsing: {help_all}')

2265 raise

2266

2267 types = set()

2268 levels = set()

2269

2270 processed_n = 0

2271 to_process = len(raw_options[:fetch_only])

2272

2273 processed_options = {sf: {} for sf in suffixes}

2274

2275 for o_i, option in enumerate(raw_options[:fetch_only]):

2276 doc, _ = shell_stdouterr(f'{castep_command} -help {option}')

2277

2278 # Stand Back! I know regular expressions (http://xkcd.com/208/) :-)

2279 match = re.match(r'(?P<before_type>.*)Type: (?P<type>.+?)\s+'

2280 + r'Level: (?P<level>[^ ]+)\n\s*\n'

2281 + r'(?P<doc>.*?)(\n\s*\n|$)', doc, re.DOTALL)

2282

2283 processed_n += 1

2284

2285 if match is not None:

2286 match = match.groupdict()

2287

2288 # JM: uncomment lines in following block to debug issues

2289 # with keyword assignment during extraction process from CASTEP

2290 suffix = None

2291 if re.findall(r'PARAMETERS keywords:\n\n\s?None found', doc):

2292 suffix = 'cell'

2293 if re.findall(r'CELL keywords:\n\n\s?None found', doc):

2294 suffix = 'param'

2295 if suffix is None:

2296 warnings.warn('%s -> not assigned to either'

2297 ' CELL or PARAMETERS keywords' % option)

2298

2299 option = option.lower()

2300 mtyp = match.get('type', None)

2301 mlvl = match.get('level', None)

2302 mdoc = match.get('doc', None)

2303

2304 if mtyp is None:

2305 warnings.warn(f'Found no type for {option}')

2306 continue

2307 if mlvl is None:

2308 warnings.warn(f'Found no level for {option}')

2309 continue

2310 if mdoc is None:

2311 warnings.warn(f'Found no doc string for {option}')

2312 continue

2313

2314 types = types.union([mtyp])

2315 levels = levels.union([mlvl])

2316

2317 processed_options[suffix][option] = {

2318 'keyword': option,

2319 'option_type': mtyp,

2320 'level': mlvl,

2321 'docstring': mdoc

2322 }

2323

2324 processed_n += 1

2325

2326 frac = (o_i + 1.0) / to_process

2327 sys.stdout.write('\rProcessed: [{}] {:>3.0f}%'.format(

2328 '#' * int(frac * 20) + ' '

2329 * (20 - int(frac * 20)),

2330 100 * frac))

2331 sys.stdout.flush()

2332

2333 else:

2334 warnings.warn(f'create_castep_keywords: Could not process {option}')

2335

2336 sys.stdout.write('\n')

2337 sys.stdout.flush()

2338

2339 processed_options['types'] = list(types)

2340 processed_options['levels'] = list(levels)

2341 processed_options['castep_version'] = castep_version

2342

2343 json.dump(processed_options, open(filepath, 'w'), indent=4)

2344

2345 warnings.warn(f'CASTEP v{castep_version}, fetched {processed_n} keywords')

2346 return True

2347

2348

2349CastepKeywords = namedtuple('CastepKeywords',

2350 ['CastepParamDict', 'CastepCellDict',

2351 'types', 'levels', 'castep_version'])

2352

2353# We keep this just for naming consistency with older versions

2354

2355

2356def make_cell_dict(data=None):

2357 from ase.io.castep.castep_input_file import CastepOptionDict

2358

2359 data = data if data is not None else {}

2360

2361 class CastepCellDict(CastepOptionDict):

2362 def __init__(self):

2363 CastepOptionDict.__init__(self, data)

2364

2365 return CastepCellDict

2366

2367

2368def make_param_dict(data=None):

2369 from ase.io.castep.castep_input_file import CastepOptionDict

2370

2371 data = data if data is not None else {}

2372

2373 class CastepParamDict(CastepOptionDict):

2374 def __init__(self):

2375 CastepOptionDict.__init__(self, data)

2376

2377 return CastepParamDict

2378

2379

2380class CastepVersionError(Exception):

2381 """No special behaviour, works to signal when Castep can not be found"""

2382

2383

2384def get_castep_pp_path(castep_pp_path=''):

2385 """Abstract the quest for a CASTEP PSP directory."""

2386 if castep_pp_path:

2387 return os.path.abspath(os.path.expanduser(castep_pp_path))

2388 elif 'PSPOT_DIR' in cfg:

2389 return cfg['PSPOT_DIR']

2390 elif 'CASTEP_PP_PATH' in cfg:

2391 return cfg['CASTEP_PP_PATH']

2392 else:

2393 return os.path.abspath('.')

2394

2395

2396def get_castep_command(castep_command=''):

2397 """Abstract the quest for a castep_command string."""

2398 if castep_command:

2399 return castep_command

2400 elif 'CASTEP_COMMAND' in cfg:

2401 return cfg['CASTEP_COMMAND']

2402 else:

2403 return 'castep'

2404

2405

2406def shell_stdouterr(raw_command, cwd=None):

2407 """Abstracts the standard call of the commandline, when

2408 we are only interested in the stdout and stderr

2409 """

2410 stdout, stderr = subprocess.Popen(raw_command,

2411 stdout=subprocess.PIPE,

2412 stderr=subprocess.PIPE,

2413 universal_newlines=True,

2414 shell=True, cwd=cwd).communicate()

2415 return stdout.strip(), stderr.strip()

2416

2417

2418def import_castep_keywords(castep_command='',

2419 filename='castep_keywords.json',

2420 path='.'):

2421 """Search for castep keywords JSON in multiple paths"""

2422

2423 config_paths = ('~/.ase', '~/.config/ase')

2424 searchpaths = [path] + [os.path.expanduser(config_path)

2425 for config_path in config_paths]

2426 try:

2427 keywords_file = sum(

2428 (glob.glob(os.path.join(sp, filename)) for sp in searchpaths), []

2429 )[0]

2430 except IndexError:

2431 warnings.warn("""Generating CASTEP keywords JSON file... hang on.

2432 The CASTEP keywords JSON file contains abstractions for CASTEP input

2433 parameters (for both .cell and .param input files), including some

2434 format checks and descriptions. The latter are extracted from the

2435 internal online help facility of a CASTEP binary, thus allowing to

2436 easily keep the calculator synchronized with (different versions of)

2437 the CASTEP code. Consequently, avoiding licensing issues (CASTEP is

2438 distributed commercially by Biovia), we consider it wise not to

2439 provide the file in the first place.""")

2440 create_castep_keywords(get_castep_command(castep_command),

2441 filename=filename, path=path)

2442 keywords_file = Path(path).absolute() / filename

2443

2444 warnings.warn(

2445 f'Stored castep keywords dictionary as {keywords_file}. '

2446 f'Copy it to {Path(config_paths[0]).expanduser() / filename} for '

2447 r'user installation.')

2448

2449 # Now create the castep_keywords object proper

2450 with open(keywords_file) as fd:

2451 kwdata = json.load(fd)

2452

2453 # This is a bit awkward, but it's necessary for backwards compatibility

2454 param_dict = make_param_dict(kwdata['param'])

2455 cell_dict = make_cell_dict(kwdata['cell'])

2456

2457 castep_keywords = CastepKeywords(param_dict, cell_dict,

2458 kwdata['types'], kwdata['levels'],

2459 kwdata['castep_version'])

2460

2461 return castep_keywords

2462

2463

2464if __name__ == '__main__':

2465 warnings.warn(

2466 'When called directly this calculator will fetch all available '

2467 'keywords from the binarys help function into a '

2468 'castep_keywords.json in the current directory %s '

2469 'For system wide usage, it can be copied into an ase installation '

2470 'at ASE/calculators. '

2471 'This castep_keywords.json usually only needs to be generated once '

2472 'for a CASTEP binary/CASTEP version.' % os.getcwd())

2473

2474 import optparse

2475 parser = optparse.OptionParser()

2476 parser.add_option(

2477 '-f', '--force-write', dest='force_write',

2478 help='Force overwriting existing castep_keywords.json', default=False,

2479 action='store_true')

2480 (options, args) = parser.parse_args()

2481

2482 if args:

2483 opt_castep_command = ''.join(args)

2484 else:

2485 opt_castep_command = ''

2486 generated = create_castep_keywords(get_castep_command(opt_castep_command),

2487 force_write=options.force_write)

2488

2489 if generated:

2490 try:

2491 with open('castep_keywords.json') as fd:

2492 json.load(fd)

2493 except Exception as e:

2494 warnings.warn(

2495 f'{e} Ooops, something went wrong with the CASTEP keywords')

2496 else:

2497 warnings.warn('Import works. Looking good!')