Coverage for /builds/hweiske/ase/ase/io/extxyz.py: 81.70%

519 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-22 11:22 +0000

1""" 

2Extended XYZ support 

3 

4Read/write files in "extended" XYZ format, storing additional 

5per-configuration information as key-value pairs on the XYZ 

6comment line, and additional per-atom properties as extra columns. 

7 

8Contributed by James Kermode <james.kermode@gmail.com> 

9""" 

10import json 

11import numbers 

12import re 

13import warnings 

14from io import StringIO, UnsupportedOperation 

15 

16import numpy as np 

17 

18from ase.atoms import Atoms 

19from ase.calculators.singlepoint import SinglePointCalculator 

20from ase.constraints import FixAtoms, FixCartesian 

21from ase.io.formats import index2range 

22from ase.io.utils import ImageIterator 

23from ase.outputs import ArrayProperty, all_outputs 

24from ase.spacegroup.spacegroup import Spacegroup 

25from ase.stress import voigt_6_to_full_3x3_stress 

26from ase.utils import reader, writer 

27 

28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

29 

30PROPERTY_NAME_MAP = {'positions': 'pos', 

31 'numbers': 'Z', 

32 'charges': 'charge', 

33 'symbols': 'species'} 

34 

35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

36 PROPERTY_NAME_MAP.keys())) 

37 

38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

41 + r'\s*([^\s]+)\s*') 

42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

43 

44UNPROCESSED_KEYS = {'uid'} 

45 

46SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'} 

47 

48# select subset of properties that are not per-atom 

49per_config_properties = [key for key, val in all_outputs.items() 

50 if not (isinstance(val, ArrayProperty) and 

51 val.shapespec[0] == 'natoms')] 

52 

53 

54def key_val_str_to_dict(string, sep=None): 

55 """ 

56 Parse an xyz properties string in a key=value and return a dict with 

57 various values parsed to native types. 

58 

59 Accepts brackets or quotes to delimit values. Parses integers, floats 

60 booleans and arrays thereof. Arrays with 9 values whose name is listed 

61 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

62 

63 If sep is None, string will split on whitespace, otherwise will split 

64 key value pairs with the given separator. 

65 

66 """ 

67 # store the closing delimiters to match opening ones 

68 delimiters = { 

69 "'": "'", 

70 '"': '"', 

71 '{': '}', 

72 '[': ']' 

73 } 

74 

75 # Make pairs and process afterwards 

76 kv_pairs = [ 

77 [[]]] # List of characters for each entry, add a new list for new value 

78 cur_delimiter = None # push and pop closing delimiters 

79 escaped = False # add escaped sequences verbatim 

80 

81 # parse character-by-character unless someone can do nested brackets 

82 # and escape sequences in a regex 

83 for char in string.strip(): 

84 if escaped: # bypass everything if escaped 

85 kv_pairs[-1][-1].append(char) 

86 escaped = False 

87 elif char == '\\': # escape the next thing 

88 escaped = True 

89 elif cur_delimiter: # inside brackets 

90 if char == cur_delimiter: # found matching delimiter 

91 cur_delimiter = None 

92 else: 

93 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

94 elif char in delimiters: 

95 cur_delimiter = delimiters[char] # brackets or quotes 

96 elif (sep is None and char.isspace()) or char == sep: 

97 if kv_pairs == [[[]]]: # empty, beginning of string 

98 continue 

99 elif kv_pairs[-1][-1] == []: 

100 continue 

101 else: 

102 kv_pairs.append([[]]) 

103 elif char == '=': 

104 if kv_pairs[-1] == [[]]: 

105 del kv_pairs[-1] 

106 kv_pairs[-1].append([]) # value 

107 else: 

108 kv_pairs[-1][-1].append(char) 

109 

110 kv_dict = {} 

111 

112 for kv_pair in kv_pairs: 

113 if len(kv_pair) == 0: # empty line 

114 continue 

115 elif len(kv_pair) == 1: # default to True 

116 key, value = ''.join(kv_pair[0]), 'T' 

117 else: # Smush anything else with kv-splitter '=' between them 

118 key, value = ''.join(kv_pair[0]), '='.join( 

119 ''.join(x) for x in kv_pair[1:]) 

120 

121 if key.lower() not in UNPROCESSED_KEYS: 

122 # Try to convert to (arrays of) floats, ints 

123 split_value = re.findall(r'[^\s,]+', value) 

124 try: 

125 try: 

126 numvalue = np.array(split_value, dtype=int) 

127 except (ValueError, OverflowError): 

128 # don't catch errors here so it falls through to bool 

129 numvalue = np.array(split_value, dtype=float) 

130 if len(numvalue) == 1: 

131 numvalue = numvalue[0] # Only one number 

132 value = numvalue 

133 except (ValueError, OverflowError): 

134 pass # value is unchanged 

135 

136 # convert special 3x3 matrices 

137 if key in SPECIAL_3_3_KEYS: 

138 if not isinstance(value, np.ndarray) or value.shape != (9,): 

139 raise ValueError("Got info item {}, expecting special 3x3 " 

140 "matrix, but value is not in the form of " 

141 "a 9-long numerical vector".format(key)) 

142 value = np.array(value).reshape((3, 3), order='F') 

143 

144 # parse special strings as boolean or JSON 

145 if isinstance(value, str): 

146 # Parse boolean values: 

147 # T or [tT]rue or TRUE -> True 

148 # F or [fF]alse or FALSE -> False 

149 # For list: 'T T F' -> [True, True, False] 

150 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

151 # 't'/'f' not accepted 

152 str_to_bool = { 

153 'T': True, 'F': False, 'true': True, 'false': False, 

154 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

155 } 

156 try: 

157 boolvalue = [str_to_bool[vpart] for vpart in 

158 re.findall(r'[^\s,]+', value)] 

159 

160 if len(boolvalue) == 1: 

161 value = boolvalue[0] 

162 else: 

163 value = boolvalue 

164 except KeyError: 

165 # Try to parse JSON 

166 if value.startswith("_JSON "): 

167 d = json.loads(value.replace("_JSON ", "", 1)) 

168 value = np.array(d) 

169 if value.dtype.kind not in ['i', 'f', 'b']: 

170 value = d 

171 

172 kv_dict[key] = value 

173 

174 return kv_dict 

175 

176 

177def key_val_str_to_dict_regex(s): 

178 """ 

179 Parse strings in the form 'key1=value1 key2="quoted value"' 

180 """ 

181 d = {} 

182 s = s.strip() 

183 while True: 

184 # Match quoted string first, then fall through to plain key=value 

185 m = KEY_QUOTED_VALUE.match(s) 

186 if m is None: 

187 m = KEY_VALUE.match(s) 

188 if m is not None: 

189 s = KEY_VALUE.sub('', s, 1) 

190 else: 

191 # Just a key with no value 

192 m = KEY_RE.match(s) 

193 if m is not None: 

194 s = KEY_RE.sub('', s, 1) 

195 else: 

196 s = KEY_QUOTED_VALUE.sub('', s, 1) 

197 

198 if m is None: 

199 break # No more matches 

200 

201 key = m.group(1) 

202 try: 

203 value = m.group(2) 

204 except IndexError: 

205 # default value is 'T' (True) 

206 value = 'T' 

207 

208 if key.lower() not in UNPROCESSED_KEYS: 

209 # Try to convert to (arrays of) floats, ints 

210 try: 

211 numvalue = [] 

212 for x in value.split(): 

213 if x.find('.') == -1: 

214 numvalue.append(int(float(x))) 

215 else: 

216 numvalue.append(float(x)) 

217 if len(numvalue) == 1: 

218 numvalue = numvalue[0] # Only one number 

219 elif len(numvalue) == 9: 

220 # special case: 3x3 matrix, fortran ordering 

221 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

222 else: 

223 numvalue = np.array(numvalue) # vector 

224 value = numvalue 

225 except (ValueError, OverflowError): 

226 pass 

227 

228 # Parse boolean values: 'T' -> True, 'F' -> False, 

229 # 'T T F' -> [True, True, False] 

230 if isinstance(value, str): 

231 str_to_bool = {'T': True, 'F': False} 

232 

233 if len(value.split()) > 1: 

234 if all(x in str_to_bool for x in value.split()): 

235 value = [str_to_bool[x] for x in value.split()] 

236 elif value in str_to_bool: 

237 value = str_to_bool[value] 

238 

239 d[key] = value 

240 

241 return d 

242 

243 

244def escape(string): 

245 if (' ' in string or 

246 '"' in string or "'" in string or 

247 '{' in string or '}' in string or 

248 '[' in string or ']' in string): 

249 string = string.replace('"', '\\"') 

250 string = f'"{string}"' 

251 return string 

252 

253 

254def key_val_dict_to_str(dct, sep=' '): 

255 """ 

256 Convert atoms.info dictionary to extended XYZ string representation 

257 """ 

258 

259 def array_to_string(key, val): 

260 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

261 # numbers or bools), handle them here 

262 if key in SPECIAL_3_3_KEYS: 

263 # special 3x3 matrix, flatten in Fortran order 

264 val = val.reshape(val.size, order='F') 

265 if val.dtype.kind in ['i', 'f', 'b']: 

266 # numerical or bool scalars/vectors are special, for backwards 

267 # compat. 

268 if len(val.shape) == 0: 

269 # scalar 

270 val = str(known_types_to_str(val)) 

271 elif len(val.shape) == 1: 

272 # vector 

273 val = ' '.join(str(known_types_to_str(v)) for v in val) 

274 return val 

275 

276 def known_types_to_str(val): 

277 if isinstance(val, (bool, np.bool_)): 

278 return 'T' if val else 'F' 

279 elif isinstance(val, numbers.Real): 

280 return f'{val}' 

281 elif isinstance(val, Spacegroup): 

282 return val.symbol 

283 else: 

284 return val 

285 

286 if len(dct) == 0: 

287 return '' 

288 

289 string = '' 

290 for key in dct: 

291 val = dct[key] 

292 

293 if isinstance(val, np.ndarray): 

294 val = array_to_string(key, val) 

295 else: 

296 # convert any known types to string 

297 val = known_types_to_str(val) 

298 

299 if val is not None and not isinstance(val, str): 

300 # what's left is an object, try using JSON 

301 if isinstance(val, np.ndarray): 

302 val = val.tolist() 

303 try: 

304 val = '_JSON ' + json.dumps(val) 

305 # if this fails, let give up 

306 except TypeError: 

307 warnings.warn('Skipping unhashable information ' 

308 '{}'.format(key)) 

309 continue 

310 

311 key = escape(key) # escape and quote key 

312 eq = "=" 

313 # Should this really be setting empty value that's going to be 

314 # interpreted as bool True? 

315 if val is None: 

316 val = "" 

317 eq = "" 

318 val = escape(val) # escape and quote val 

319 

320 string += f'{key}{eq}{val}{sep}' 

321 

322 return string.strip() 

323 

324 

325def parse_properties(prop_str): 

326 """ 

327 Parse extended XYZ properties format string 

328 

329 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

330 NAME is the name of the property. 

331 TYPE is one of R, I, S, L for real, integer, string and logical. 

332 NCOLS is number of columns for that property. 

333 """ 

334 

335 properties = {} 

336 properties_list = [] 

337 dtypes = [] 

338 converters = [] 

339 

340 fields = prop_str.split(':') 

341 

342 def parse_bool(x): 

343 """ 

344 Parse bool to string 

345 """ 

346 return {'T': True, 'F': False, 

347 'True': True, 'False': False}.get(x) 

348 

349 fmt_map = {'R': ('d', float), 

350 'I': ('i', int), 

351 'S': (object, str), 

352 'L': ('bool', parse_bool)} 

353 

354 for name, ptype, cols in zip(fields[::3], 

355 fields[1::3], 

356 [int(x) for x in fields[2::3]]): 

357 if ptype not in ('R', 'I', 'S', 'L'): 

358 raise ValueError('Unknown property type: ' + ptype) 

359 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

360 

361 dtype, converter = fmt_map[ptype] 

362 if cols == 1: 

363 dtypes.append((name, dtype)) 

364 converters.append(converter) 

365 else: 

366 for c in range(cols): 

367 dtypes.append((name + str(c), dtype)) 

368 converters.append(converter) 

369 

370 properties[name] = (ase_name, cols) 

371 properties_list.append(name) 

372 

373 dtype = np.dtype(dtypes) 

374 return properties, properties_list, dtype, converters 

375 

376 

377def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

378 nvec=0): 

379 # comment line 

380 line = next(lines).strip() 

381 if nvec > 0: 

382 info = {'comment': line} 

383 else: 

384 info = properties_parser(line) if line else {} 

385 

386 pbc = None 

387 if 'pbc' in info: 

388 pbc = info.pop('pbc') 

389 elif 'Lattice' in info: 

390 # default pbc for extxyz file containing Lattice 

391 # is True in all directions 

392 pbc = [True, True, True] 

393 elif nvec > 0: 

394 # cell information given as pseudo-Atoms 

395 pbc = [False, False, False] 

396 

397 cell = None 

398 if 'Lattice' in info: 

399 # NB: ASE cell is transpose of extended XYZ lattice 

400 cell = info['Lattice'].T 

401 del info['Lattice'] 

402 elif nvec > 0: 

403 # cell information given as pseudo-Atoms 

404 cell = np.zeros((3, 3)) 

405 

406 if 'Properties' not in info: 

407 # Default set of properties is atomic symbols and positions only 

408 info['Properties'] = 'species:S:1:pos:R:3' 

409 properties, names, dtype, convs = parse_properties(info['Properties']) 

410 del info['Properties'] 

411 

412 data = [] 

413 for _ in range(natoms): 

414 try: 

415 line = next(lines) 

416 except StopIteration: 

417 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

418 .format(len(data), natoms)) 

419 vals = line.split() 

420 row = tuple(conv(val) for conv, val in zip(convs, vals)) 

421 data.append(row) 

422 

423 try: 

424 data = np.array(data, dtype) 

425 except TypeError: 

426 raise XYZError('Badly formatted data ' 

427 'or end of file reached before end of frame') 

428 

429 # Read VEC entries if present 

430 if nvec > 0: 

431 for ln in range(nvec): 

432 try: 

433 line = next(lines) 

434 except StopIteration: 

435 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

436 'expected {}'.format(len(cell), nvec)) 

437 entry = line.split() 

438 

439 if not entry[0].startswith('VEC'): 

440 raise XYZError(f'Expected cell vector, got {entry[0]}') 

441 

442 try: 

443 n = int(entry[0][3:]) 

444 except ValueError as e: 

445 raise XYZError('Expected VEC{}, got VEC{}' 

446 .format(ln + 1, entry[0][3:])) from e 

447 if n != ln + 1: 

448 raise XYZError('Expected VEC{}, got VEC{}' 

449 .format(ln + 1, n)) 

450 

451 cell[ln] = np.array([float(x) for x in entry[1:]]) 

452 pbc[ln] = True 

453 if nvec != pbc.count(True): 

454 raise XYZError('Problem with number of cell vectors') 

455 pbc = tuple(pbc) 

456 

457 arrays = {} 

458 for name in names: 

459 ase_name, cols = properties[name] 

460 if cols == 1: 

461 value = data[name] 

462 else: 

463 value = np.vstack([data[name + str(c)] 

464 for c in range(cols)]).T 

465 arrays[ase_name] = value 

466 

467 numbers = arrays.pop('numbers', None) 

468 symbols = arrays.pop('symbols', None) 

469 

470 if symbols is not None: 

471 symbols = [s.capitalize() for s in symbols] 

472 

473 atoms = Atoms(numbers if numbers is not None else symbols, 

474 positions=arrays.pop('positions', None), 

475 charges=arrays.pop('initial_charges', None), 

476 cell=cell, 

477 pbc=pbc, 

478 info=info) 

479 

480 # Read and set constraints 

481 if 'move_mask' in arrays: 

482 move_mask = arrays.pop('move_mask').astype(bool) 

483 if properties['move_mask'][1] == 3: 

484 cons = [] 

485 for a in range(natoms): 

486 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

487 atoms.set_constraint(cons) 

488 elif properties['move_mask'][1] == 1: 

489 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

490 else: 

491 raise XYZError('Not implemented constraint') 

492 

493 set_calc_and_arrays(atoms, arrays) 

494 return atoms 

495 

496 

497def set_calc_and_arrays(atoms, arrays): 

498 results = {} 

499 

500 for name, array in arrays.items(): 

501 if name in all_outputs: 

502 results[name] = array 

503 else: 

504 atoms.new_array(name, array) 

505 

506 for key in list(atoms.info): 

507 if key in per_config_properties: 

508 results[key] = atoms.info.pop(key) 

509 # special case for stress- convert to Voigt 6-element form 

510 if key == 'stress' and results[key].shape == (3, 3): 

511 stress = results[key] 

512 stress = np.array([stress[0, 0], 

513 stress[1, 1], 

514 stress[2, 2], 

515 stress[1, 2], 

516 stress[0, 2], 

517 stress[0, 1]]) 

518 results[key] = stress 

519 

520 if results: 

521 atoms.calc = SinglePointCalculator(atoms, **results) 

522 

523 

524class XYZError(IOError): 

525 pass 

526 

527 

528class XYZChunk: 

529 def __init__(self, lines, natoms): 

530 self.lines = lines 

531 self.natoms = natoms 

532 

533 def build(self): 

534 """Convert unprocessed chunk into Atoms.""" 

535 return _read_xyz_frame(iter(self.lines), self.natoms) 

536 

537 

538def ixyzchunks(fd): 

539 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

540 while True: 

541 line = next(fd).strip() # Raises StopIteration on empty file 

542 try: 

543 natoms = int(line) 

544 except ValueError: 

545 raise XYZError(f'Expected integer, found "{line}"') 

546 try: 

547 lines = [next(fd) for _ in range(1 + natoms)] 

548 except StopIteration: 

549 raise XYZError('Incomplete XYZ chunk') 

550 yield XYZChunk(lines, natoms) 

551 

552 

553iread_xyz = ImageIterator(ixyzchunks) 

554 

555 

556@reader 

557def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

558 r""" 

559 Read from a file in Extended XYZ format 

560 

561 index is the frame to read, default is last frame (index=-1). 

562 properties_parser is the parse to use when converting the properties line 

563 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

564 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

565 faster but has fewer features. 

566 

567 Extended XYZ format is an enhanced version of the `basic XYZ format 

568 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

569 columns to be present in the file for additonal per-atom properties as 

570 well as standardising the format of the comment line to include the 

571 cell lattice and other per-frame parameters. 

572 

573 It's easiest to describe the format with an example. Here is a 

574 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

575 

576 8 

577 Cubic bulk silicon cell 

578 Si 0.00000000 0.00000000 0.00000000 

579 Si 1.36000000 1.36000000 1.36000000 

580 Si 2.72000000 2.72000000 0.00000000 

581 Si 4.08000000 4.08000000 1.36000000 

582 Si 2.72000000 0.00000000 2.72000000 

583 Si 4.08000000 1.36000000 4.08000000 

584 Si 0.00000000 2.72000000 2.72000000 

585 Si 1.36000000 4.08000000 4.08000000 

586 

587 The first line is the number of atoms, followed by a comment and 

588 then one line per atom, giving the element symbol and cartesian 

589 x y, and z coordinates in Angstroms. 

590 

591 Here's the same configuration in extended XYZ format :: 

592 

593 8 

594 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

595 Si 0.00000000 0.00000000 0.00000000 

596 Si 1.36000000 1.36000000 1.36000000 

597 Si 2.72000000 2.72000000 0.00000000 

598 Si 4.08000000 4.08000000 1.36000000 

599 Si 2.72000000 0.00000000 2.72000000 

600 Si 4.08000000 1.36000000 4.08000000 

601 Si 0.00000000 2.72000000 2.72000000 

602 Si 1.36000000 4.08000000 4.08000000 

603 

604 In extended XYZ format, the comment line is replaced by a series of 

605 key/value pairs. The keys should be strings and values can be 

606 integers, reals, logicals (denoted by `T` and `F` for true and false) 

607 or strings. Quotes are required if a value contains any spaces (like 

608 `Lattice` above). There are two mandatory parameters that any 

609 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

610 e.g. `Time` in the example above --- can be added to the parameter line 

611 as needed. 

612 

613 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

614 vectors, with each vector stored as a column and the 9 values listed in 

615 Fortran column-major order, i.e. in the form :: 

616 

617 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

618 

619 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

620 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

621 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

622 third lattice vector (:math:`\mathbf{c}`). 

623 

624 The list of properties in the file is described by the `Properties` 

625 parameter, which should take the form of a series of colon separated 

626 triplets giving the name, format (`R` for real, `I` for integer) and 

627 number of columns of each property. For example:: 

628 

629 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

630 

631 indicates the first column represents atomic species, the next three 

632 columns represent atomic positions, the next three velcoities, and the 

633 last is an single integer called `select`. With this property 

634 definition, the line :: 

635 

636 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

637 

638 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

639 velocity and the `select` property set to 1. 

640 

641 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

642 :attr:`ase.atoms.Atoms.arrays` entries named 

643 `positions`, `numbers`, `masses` and `charges` respectively. 

644 

645 Additional key-value pairs in the comment line are parsed into the 

646 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

647 

648 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

649 included to ease command-line usage as the `{}` are not treated 

650 specially by the shell) 

651 - Quotes within keys or values can be escaped with `\"`. 

652 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

653 in Fortran order, as for `Lattice` above. 

654 - Otherwise, values with multiple elements are treated as 1D arrays, first 

655 assuming integer format and falling back to float if conversion is 

656 unsuccessful. 

657 - A missing value defaults to `True`, e.g. the comment line 

658 `"cutoff=3.4 have_energy"` leads to 

659 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

660 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

661 similarly, when writing, anything which does not match the criteria above 

662 is serialised as JSON. 

663 

664 The extended XYZ format is also supported by the 

665 the `Ovito <https://www.ovito.org>`_ visualisation tool. 

666 """ # noqa: E501 

667 

668 if not isinstance(index, int) and not isinstance(index, slice): 

669 raise TypeError('Index argument is neither slice nor integer!') 

670 

671 # If possible, build a partial index up to the last frame required 

672 last_frame = None 

673 if isinstance(index, int) and index >= 0: 

674 last_frame = index 

675 elif isinstance(index, slice): 

676 if index.stop is not None and index.stop >= 0: 

677 last_frame = index.stop 

678 

679 # scan through file to find where the frames start 

680 try: 

681 fileobj.seek(0) 

682 except UnsupportedOperation: 

683 fileobj = StringIO(fileobj.read()) 

684 fileobj.seek(0) 

685 frames = [] 

686 while True: 

687 frame_pos = fileobj.tell() 

688 line = fileobj.readline() 

689 if line.strip() == '': 

690 break 

691 try: 

692 natoms = int(line) 

693 except ValueError as err: 

694 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

695 .format(err)) 

696 fileobj.readline() # read comment line 

697 for _ in range(natoms): 

698 fileobj.readline() 

699 # check for VEC 

700 nvec = 0 

701 while True: 

702 lastPos = fileobj.tell() 

703 line = fileobj.readline() 

704 if line.lstrip().startswith('VEC'): 

705 nvec += 1 

706 if nvec > 3: 

707 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

708 else: 

709 fileobj.seek(lastPos) 

710 break 

711 frames.append((frame_pos, natoms, nvec)) 

712 if last_frame is not None and len(frames) > last_frame: 

713 break 

714 

715 trbl = index2range(index, len(frames)) 

716 

717 for index in trbl: 

718 frame_pos, natoms, nvec = frames[index] 

719 fileobj.seek(frame_pos) 

720 # check for consistency with frame index table 

721 assert int(fileobj.readline()) == natoms 

722 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

723 

724 

725def output_column_format(atoms, columns, arrays, write_info=True): 

726 """ 

727 Helper function to build extended XYZ comment line 

728 """ 

729 fmt_map = {'d': ('R', '%16.8f'), 

730 'f': ('R', '%16.8f'), 

731 'i': ('I', '%8d'), 

732 'O': ('S', '%s'), 

733 'S': ('S', '%s'), 

734 'U': ('S', '%-2s'), 

735 'b': ('L', ' %.1s')} 

736 

737 # NB: Lattice is stored as tranpose of ASE cell, 

738 # with Fortran array ordering 

739 lattice_str = ('Lattice="' 

740 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

741 9, order='F')]) + 

742 '"') 

743 

744 property_names = [] 

745 property_types = [] 

746 property_ncols = [] 

747 dtypes = [] 

748 formats = [] 

749 

750 for column in columns: 

751 array = arrays[column] 

752 dtype = array.dtype 

753 

754 property_name = PROPERTY_NAME_MAP.get(column, column) 

755 property_type, fmt = fmt_map[dtype.kind] 

756 property_names.append(property_name) 

757 property_types.append(property_type) 

758 

759 if (len(array.shape) == 1 

760 or (len(array.shape) == 2 and array.shape[1] == 1)): 

761 ncol = 1 

762 dtypes.append((column, dtype)) 

763 else: 

764 ncol = array.shape[1] 

765 for c in range(ncol): 

766 dtypes.append((column + str(c), dtype)) 

767 

768 formats.extend([fmt] * ncol) 

769 property_ncols.append(ncol) 

770 

771 props_str = ':'.join([':'.join(x) for x in 

772 zip(property_names, 

773 property_types, 

774 [str(nc) for nc in property_ncols])]) 

775 

776 comment_str = '' 

777 if atoms.cell.any(): 

778 comment_str += lattice_str + ' ' 

779 comment_str += f'Properties={props_str}' 

780 

781 info = {} 

782 if write_info: 

783 info.update(atoms.info) 

784 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

785 comment_str += ' ' + key_val_dict_to_str(info) 

786 

787 dtype = np.dtype(dtypes) 

788 fmt = ' '.join(formats) + '\n' 

789 

790 return comment_str, property_ncols, dtype, fmt 

791 

792 

793@writer 

794def write_xyz(fileobj, images, comment='', columns=None, 

795 write_info=True, 

796 write_results=True, plain=False, vec_cell=False): 

797 """ 

798 Write output in extended XYZ format 

799 

800 Optionally, specify which columns (arrays) to include in output, 

801 whether to write the contents of the `atoms.info` dict to the 

802 XYZ comment line (default is True), the results of any 

803 calculator attached to this Atoms. The `plain` argument 

804 can be used to write a simple XYZ file with no additional information. 

805 `vec_cell` can be used to write the cell vectors as additional 

806 pseudo-atoms. 

807 

808 See documentation for :func:`read_xyz()` for further details of the extended 

809 XYZ file format. 

810 """ 

811 

812 if hasattr(images, 'get_positions'): 

813 images = [images] 

814 

815 for atoms in images: 

816 natoms = len(atoms) 

817 

818 if write_results: 

819 calculator = atoms.calc 

820 atoms = atoms.copy() 

821 

822 save_calc_results(atoms, calculator, calc_prefix="") 

823 

824 if atoms.info.get('stress', np.array([])).shape == (6,): 

825 atoms.info['stress'] = \ 

826 voigt_6_to_full_3x3_stress(atoms.info['stress']) 

827 

828 if columns is None: 

829 fr_cols = (['symbols', 'positions'] 

830 + [key for key in atoms.arrays if 

831 key not in ['symbols', 'positions', 'numbers', 

832 'species', 'pos']]) 

833 else: 

834 fr_cols = columns[:] 

835 

836 if vec_cell: 

837 plain = True 

838 

839 if plain: 

840 fr_cols = ['symbols', 'positions'] 

841 write_info = False 

842 write_results = False 

843 

844 # Move symbols and positions to first two properties 

845 if 'symbols' in fr_cols: 

846 i = fr_cols.index('symbols') 

847 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

848 

849 if 'positions' in fr_cols: 

850 i = fr_cols.index('positions') 

851 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

852 

853 # Check first column "looks like" atomic symbols 

854 if fr_cols[0] in atoms.arrays: 

855 symbols = atoms.arrays[fr_cols[0]] 

856 else: 

857 symbols = [*atoms.symbols] 

858 

859 if natoms > 0 and not isinstance(symbols[0], str): 

860 raise ValueError('First column must be symbols-like') 

861 

862 # Check second column "looks like" atomic positions 

863 pos = atoms.arrays[fr_cols[1]] 

864 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

865 raise ValueError('Second column must be position-like') 

866 

867 # if vec_cell add cell information as pseudo-atoms 

868 if vec_cell: 

869 nPBC = 0 

870 for i, b in enumerate(atoms.pbc): 

871 if not b: 

872 continue 

873 nPBC += 1 

874 symbols.append('VEC' + str(nPBC)) 

875 pos = np.vstack((pos, atoms.cell[i])) 

876 # add to natoms 

877 natoms += nPBC 

878 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

879 raise ValueError( 

880 'Pseudo Atoms containing cell have bad coords') 

881 

882 # Move mask 

883 if 'move_mask' in fr_cols: 

884 cnstr = images[0]._get_constraints() 

885 if len(cnstr) > 0: 

886 c0 = cnstr[0] 

887 if isinstance(c0, FixAtoms): 

888 cnstr = np.ones((natoms,), dtype=bool) 

889 for idx in c0.index: 

890 cnstr[idx] = False # cnstr: atoms that can be moved 

891 elif isinstance(c0, FixCartesian): 

892 masks = np.ones((natoms, 3), dtype=bool) 

893 for i in range(len(cnstr)): 

894 idx = cnstr[i].index 

895 masks[idx] = cnstr[i].mask 

896 cnstr = ~masks # cnstr: coordinates that can be moved 

897 else: 

898 fr_cols.remove('move_mask') 

899 

900 # Collect data to be written out 

901 arrays = {} 

902 for column in fr_cols: 

903 if column == 'positions': 

904 arrays[column] = pos 

905 elif column in atoms.arrays: 

906 arrays[column] = atoms.arrays[column] 

907 elif column == 'symbols': 

908 arrays[column] = np.array(symbols) 

909 elif column == 'move_mask': 

910 arrays[column] = cnstr 

911 else: 

912 raise ValueError(f'Missing array "{column}"') 

913 

914 comm, ncols, dtype, fmt = output_column_format(atoms, 

915 fr_cols, 

916 arrays, 

917 write_info) 

918 

919 if plain or comment != '': 

920 # override key/value pairs with user-speficied comment string 

921 comm = comment.rstrip() 

922 if '\n' in comm: 

923 raise ValueError('Comment line should not have line breaks.') 

924 

925 # Pack fr_cols into record array 

926 data = np.zeros(natoms, dtype) 

927 for column, ncol in zip(fr_cols, ncols): 

928 value = arrays[column] 

929 if ncol == 1: 

930 data[column] = np.squeeze(value) 

931 else: 

932 for c in range(ncol): 

933 data[column + str(c)] = value[:, c] 

934 

935 nat = natoms 

936 if vec_cell: 

937 nat -= nPBC 

938 # Write the output 

939 fileobj.write('%d\n' % nat) 

940 fileobj.write(f'{comm}\n') 

941 for i in range(natoms): 

942 fileobj.write(fmt % tuple(data[i])) 

943 

944 

945def save_calc_results(atoms, calc=None, calc_prefix=None, 

946 remove_atoms_calc=False, force=False): 

947 """Update information in atoms from results in a calculator 

948 

949 Args: 

950 atoms (ase.atoms.Atoms): Atoms object, modified in place 

951 calc (ase.calculators.Calculator, optional): calculator to take results 

952 from. Defaults to :attr:`atoms.calc` 

953 calc_prefix (str, optional): String to prefix to results names 

954 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to 

955 calculator class name 

956 remove_atoms_calc (bool): remove the calculator from the `atoms` 

957 object after saving its results. Defaults to `False`, ignored if 

958 `calc` is passed in 

959 force (bool, optional): overwrite existing fields with same name, 

960 default False 

961 """ 

962 if calc is None: 

963 calc_use = atoms.calc 

964 else: 

965 calc_use = calc 

966 

967 if calc_use is None: 

968 return None, None 

969 

970 if calc_prefix is None: 

971 calc_prefix = calc_use.__class__.__name__ + '_' 

972 

973 per_config_results = {} 

974 per_atom_results = {} 

975 for prop, value in calc_use.results.items(): 

976 if prop in per_config_properties: 

977 per_config_results[calc_prefix + prop] = value 

978 else: 

979 per_atom_results[calc_prefix + prop] = value 

980 

981 if not force: 

982 if any(key in atoms.info for key in per_config_results): 

983 raise KeyError("key from calculator already exists in atoms.info") 

984 if any(key in atoms.arrays for key in per_atom_results): 

985 raise KeyError("key from calculator already exists in atoms.arrays") 

986 

987 atoms.info.update(per_config_results) 

988 atoms.arrays.update(per_atom_results) 

989 

990 if remove_atoms_calc and calc is None: 

991 atoms.calc = None 

992 

993 

994# create aliases for read/write functions 

995read_extxyz = read_xyz 

996write_extxyz = write_xyz