5.转换lammps的轨迹文件为XDATCAR
最新方法,逐个转换
import numpy as np
import time
def timer(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
print(f"Time elapsed: {end_time - start_time:.4f} seconds")
return result
return wrapper
@timer
def convert_lmp_trj_to_xdatcar_streaming(
input_filename: str,
output_filename: str,
sample_interval: int = 10
):
"""
逐帧读取 LAMMPS dump 文件 (lammpstrj),每 sample_interval 帧采样一次,并即时写入 XDATCAR 格式的输出文件。
- input_filename: LAMMPS trajectory 文件路径
- output_filename: 输出文件路径 (XDATCAR)
- sample_interval: 采样间隔,例如 10 表示只处理第 10, 20, 30...帧
"""
# 打开输入输出文件
with open(input_filename, 'r') as fin, open(output_filename, 'w') as fout:
frame_index = 0 # 帧计数,从 1 开始计数时先 +1
sample_count = 0 # 已经写出的采样帧计数,用于 Direct configuration 序号
# 以下变量用于在循环中暂存 header info
while True:
line = fin.readline()
if not line:
break # 文件末尾,结束
# 寻找“ITEM: NUMBER OF ATOMS”行,表示新帧开始
if line.strip().startswith("ITEM: NUMBER OF ATOMS"):
# 读取原子数
line_n = fin.readline()
n_atoms = int(line_n)
frame_index += 1
# 解析 BOX BOUNDS
# 假定接下来一行是 BOX BOUNDS header,例如 "ITEM: BOX BOUNDS pp pp pp"
box_header = fin.readline()
# 读取 3 行 bounds
bounds_list = []
for i in range(3):
bound_line = fin.readline().strip().split()
floats = [float(x) for x in bound_line]
bounds_list.append(floats)
#有了晶格常数可以将笛卡尔坐标转换为分数坐标了
bounds_lattice = parse_triclinic_bounds(bounds_list)
atoms_header_line = fin.readline()
parts = atoms_header_line.strip().split()
col_names = parts[2:]
n_cols = len(col_names)
# 确定坐标列索引(x, y, z)
name2idx = {name: idx for idx, name in enumerate(col_names)}
if all(k in name2idx for k in ('x','y','z')):
coord_cols = [name2idx['x'], name2idx['y'], name2idx['z']]
else:
# 尝试 'xs','ys','zs'
if all(k in name2idx for k in ('xs','ys','zs')):
coord_cols = [name2idx['xs'], name2idx['ys'], name2idx['zs']]
else:
# 如果无法自动识别,可以抛错或手动指定
raise ValueError(f"无法自动识别坐标列 'x,y,z' 或 'xs,ys,zs',ATOMS header: {col_names}")
id_col = name2idx['id']
element_col = name2idx['element']
###可以用添加符号映射的方法来处理输出为元素名称时的情况
# 读取 n_atoms 行原子数据
# 为了节省内存,只在采样帧需要时才转换为数组;否则只读掉
atom_lines = []
if frame_index % sample_interval == 0:
for _ in range(n_atoms):
l = fin.readline()
atom_lines.append(l.strip())
else:
# 非采样帧:只读掉,不保存
for _ in range(n_atoms):
fin.readline()
if frame_index % sample_interval == 0:
sample_count += 1
arr = np.array([row.split() for row in atom_lines], dtype=float) # shape (n_atoms, n_cols)
# 排序
sort_idx = np.lexsort((arr[:,id_col],arr[:,element_col]))
arr = arr[sort_idx]
# 每帧按 id和element 列排序
coords = arr[:, coord_cols] # shape (n_atoms, 3)
# if sample_count ==1:
# fout.write("Generated from Lammps\n")
# fout.write("1.0\n")
# np.savetxt(fout,bounds_lattice,fmt='%15.10f')
# fout.write("K Y Si O\n")
# _, counts = np.unique(arr[:, element_col], return_counts=True)
# fout.write(' '.join(map(str, counts)))
# fout.write('\n')
fout.write(f'Direct configuration = {sample_count}\n')
np.savetxt(fout, coords, fmt='%.10f', delimiter=' ')
# 继续下一帧
# else: 如果不是 NUMBER OF ATOMS 行,就继续读下一行,直至文件末尾
# 结束 while
print(f"总共读取帧数: {frame_index},写出采样帧数: {sample_count}")
def parse_triclinic_bounds(bounds_list):
#这里不太对,还得改改
a = bounds_list[0][1] - bounds_list[0][0]
b = bounds_list[1][1] - bounds_list[1][0]
c = bounds_list[2][1] - bounds_list[2][0]
xy = bounds_list[0][2] # 倾斜参数
xz = bounds_list[1][2]
yz = bounds_list[2][2]
return np.array([
[a, xy, xz],
[0, b, yz],
[0, 0, c]
])
if __name__ == '__main__':
input_file = "A.lammpstrj"
output_file = "XDATCAR"
sample_interval = 10 # 每隔 10 帧采样一次
convert_lmp_trj_to_xdatcar_streaming(
input_filename=input_file,
output_filename=output_file,
sample_interval=sample_interval
)
from ase.io import read, write
from sys import argv
if __name__ == '__main__':
# lammps strj
strj_file = argv[1]
write_dir = argv[2]
print(f"From {strj_file} to {write_dir}")
data = read(strj_file, index=":")
if not os.path.exists(write_dir):
os.mkdir(write_dir)
for i in range(len(data)):
poscar_name = os.path.join(write_dir, str(i) + ".vasp")
write(poscar_name, data[i])
另一种方法:需要搞到晶格参数(ovito或者上面的脚本)
import numpy as np
import time
def timer(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
print(f"Time elapsed: {end_time - start_time:.4f} seconds")
return result
return wrapper
@timer
def read_lmp_trj(file_name: str):
f = open(file_name, 'r')
"""data segment"""
n_atoms_list = []
lat = []
pos = []
num1 = 0
#跳过文件的行数,需要修改
num_lines_to_skip = 0
current_line_num = 0
"""temp"""
# n_atoms = None
while True:
line = f.readline()
if not line:
break
current_line_num += 1
if current_line_num <= num_lines_to_skip:
continue
if "NUMBER OF ATOMS" in line:
line = f.readline()
n_atoms = int(line)
n_atoms_list.append(n_atoms)
f.readline()
"""lat"""
for i in range(3):
lines = f.readline()
lat.append(lines)
f.readline()
"""pos"""
for i in range(n_atoms):
pos.append(f.readline())
#每隔1000个时间间隔读取一次 需要改
num1 +=1
"""post treatment"""
#固定一个三维数组的两个维度,第三个维度也固定了,numpy的逻辑应该是3-2-1,第三个维度先排布,依次向前,第一个是深度
lat = np.array([i.split() for i in lat]).astype(float).reshape(-1, 3, 3)
#需要看这里的8需不需要改
pos = np.array([i.split() for i in pos]).astype(float).reshape(len(lat), -1, 8)
# idx = pos[:, :, 0].argsort(1)
#lexsort的顺序是从最后一个传入的参数开始,依次到第一个传入的参数
idx =np.lexsort((pos[:,:,0],pos[:,:,1]))
sort_pos = pos[np.arange(len(pos))[:, None], idx, :]
start3 = sort_pos[:,:,0:2].reshape(-1,2)
n_atoms_list = np.array(n_atoms_list)
return n_atoms_list, lat, sort_pos[:, :, [2, 3, 4]],start3
if __name__ == '__main__':
file_name = "A.lammpstrj"
print("Loading lmp trajectory")
n_atoms, _, x,stc = read_lmp_trj(file_name=file_name)
#np.savetxt('first_element.txt', x[0])
#np.savetxt('end_element.txt',x[-1])
#30这里需要修改,是移动的原子个数
with open('XDATCAR', 'w') as f:
for i, element in enumerate(x):
f.write(f'Direct configuration = {i + 1}\n') # 注意这里的 i + 1
np.savetxt(f, element, fmt='%.10f', delimiter=' ')
转载请注明来源 有问题可通过github提交issue