转换lammps轨迹为XDATCAR

  1. 5.转换lammps的轨迹文件为XDATCAR

5.转换lammps的轨迹文件为XDATCAR

最新方法,逐个转换

import numpy as np
import time

def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Time elapsed: {end_time - start_time:.4f} seconds")
        return result

    return wrapper

@timer

def convert_lmp_trj_to_xdatcar_streaming(
    input_filename: str,
    output_filename: str,
    sample_interval: int = 10
):
    """
    逐帧读取 LAMMPS dump 文件 (lammpstrj),每 sample_interval 帧采样一次,并即时写入 XDATCAR 格式的输出文件。
    - input_filename: LAMMPS trajectory 文件路径
    - output_filename: 输出文件路径 (XDATCAR)
    - sample_interval: 采样间隔,例如 10 表示只处理第 10, 20, 30...帧
    """





    # 打开输入输出文件
    with open(input_filename, 'r') as fin, open(output_filename, 'w') as fout:
        frame_index = 0       # 帧计数,从 1 开始计数时先 +1
        sample_count = 0      # 已经写出的采样帧计数,用于 Direct configuration 序号
        # 以下变量用于在循环中暂存 header info
        while True:
            line = fin.readline()
            if not line:
                break  # 文件末尾,结束

            # 寻找“ITEM: NUMBER OF ATOMS”行,表示新帧开始

            if line.strip().startswith("ITEM: NUMBER OF ATOMS"):
                # 读取原子数
                line_n = fin.readline()
                n_atoms = int(line_n)
                frame_index += 1
                # 解析 BOX BOUNDS
                # 假定接下来一行是 BOX BOUNDS header,例如 "ITEM: BOX BOUNDS pp pp pp"
                box_header = fin.readline()
                    # 读取 3 行 bounds
                bounds_list = []
                for i in range(3):
                    bound_line = fin.readline().strip().split()
                    floats = [float(x) for x in bound_line]
                    bounds_list.append(floats)
                #有了晶格常数可以将笛卡尔坐标转换为分数坐标了
                bounds_lattice = parse_triclinic_bounds(bounds_list)
                atoms_header_line = fin.readline()
                parts = atoms_header_line.strip().split()
                col_names = parts[2:]
                n_cols = len(col_names)
                # 确定坐标列索引(x, y, z)
                name2idx = {name: idx for idx, name in enumerate(col_names)}
                if all(k in name2idx for k in ('x','y','z')):
                    coord_cols = [name2idx['x'], name2idx['y'], name2idx['z']]
                else:
                    # 尝试 'xs','ys','zs'
                    if all(k in name2idx for k in ('xs','ys','zs')):
                        coord_cols = [name2idx['xs'], name2idx['ys'], name2idx['zs']]
                    else:
                        # 如果无法自动识别,可以抛错或手动指定
                        raise ValueError(f"无法自动识别坐标列 'x,y,z' 或 'xs,ys,zs',ATOMS header: {col_names}")

                id_col = name2idx['id']
                element_col = name2idx['element']
                ###可以用添加符号映射的方法来处理输出为元素名称时的情况

                # 读取 n_atoms 行原子数据
                # 为了节省内存,只在采样帧需要时才转换为数组;否则只读掉
                atom_lines = []
                if frame_index % sample_interval == 0:
                    for _ in range(n_atoms):
                        l = fin.readline()
                        atom_lines.append(l.strip())
                else:
                    # 非采样帧:只读掉,不保存
                    for _ in range(n_atoms):
                        fin.readline()

                if frame_index % sample_interval == 0:
                    sample_count += 1
                    arr = np.array([row.split() for row in atom_lines], dtype=float)  # shape (n_atoms, n_cols)

                    # 排序
                    sort_idx = np.lexsort((arr[:,id_col],arr[:,element_col]))
                    arr = arr[sort_idx]
                        # 每帧按 id和element 列排序

                    coords = arr[:, coord_cols]  # shape (n_atoms, 3)
                   # if sample_count ==1:
                    #    fout.write("Generated from Lammps\n")
                     #   fout.write("1.0\n")
                     #   np.savetxt(fout,bounds_lattice,fmt='%15.10f')
                     #   fout.write("K Y Si O\n")
                     #   _, counts = np.unique(arr[:, element_col], return_counts=True)
                      #  fout.write(' '.join(map(str, counts)))
                      #  fout.write('\n')
                    fout.write(f'Direct configuration = {sample_count}\n')
                    np.savetxt(fout, coords, fmt='%.10f', delimiter='   ')

                # 继续下一帧
            # else: 如果不是 NUMBER OF ATOMS 行,就继续读下一行,直至文件末尾
        # 结束 while

    print(f"总共读取帧数: {frame_index},写出采样帧数: {sample_count}")


def parse_triclinic_bounds(bounds_list):
    #这里不太对,还得改改
    a = bounds_list[0][1] - bounds_list[0][0]
    b = bounds_list[1][1] - bounds_list[1][0]
    c = bounds_list[2][1] - bounds_list[2][0]
    xy = bounds_list[0][2]  # 倾斜参数
    xz = bounds_list[1][2]
    yz = bounds_list[2][2]
    return np.array([
        [a,  xy, xz],
        [0,   b, yz],
        [0,   0,  c]
    ])





if __name__ == '__main__':
    input_file = "A.lammpstrj"
    output_file = "XDATCAR"
    sample_interval = 10  # 每隔 10 帧采样一次
    convert_lmp_trj_to_xdatcar_streaming(
        input_filename=input_file,
        output_filename=output_file,
        sample_interval=sample_interval
    )
from ase.io import read, write
from sys import argv


if __name__ == '__main__':
    # lammps strj
    strj_file = argv[1]
    write_dir = argv[2]

    print(f"From {strj_file} to {write_dir}")
    data = read(strj_file, index=":")

    if not os.path.exists(write_dir):
        os.mkdir(write_dir)

    for i in range(len(data)):
        poscar_name = os.path.join(write_dir, str(i) + ".vasp")
        write(poscar_name, data[i])

image-20231020154210390

另一种方法:需要搞到晶格参数(ovito或者上面的脚本)

import numpy as np
import time


def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Time elapsed: {end_time - start_time:.4f} seconds")
        return result

    return wrapper

@timer
def read_lmp_trj(file_name: str):
    f = open(file_name, 'r')

    """data segment"""
    n_atoms_list = []
    lat = []
    pos = []
    num1 = 0
#跳过文件的行数,需要修改 
    num_lines_to_skip = 0
    current_line_num = 0

    """temp"""
    # n_atoms = None

    while True:
        line = f.readline()

        if not line:
            break
        
        current_line_num += 1 
        if current_line_num <= num_lines_to_skip:
            continue
        
        if "NUMBER OF ATOMS" in line:
            line = f.readline()
            n_atoms = int(line)
            n_atoms_list.append(n_atoms)

            f.readline()

            """lat"""
            for i in range(3):
                lines = f.readline()
                lat.append(lines)

            f.readline()

            """pos"""
            for i in range(n_atoms):
                pos.append(f.readline())
#每隔1000个时间间隔读取一次 需要改
            num1 +=1

    """post treatment"""
    #固定一个三维数组的两个维度,第三个维度也固定了,numpy的逻辑应该是3-2-1,第三个维度先排布,依次向前,第一个是深度
    lat = np.array([i.split() for i in lat]).astype(float).reshape(-1, 3, 3)
#需要看这里的8需不需要改
    pos = np.array([i.split() for i in pos]).astype(float).reshape(len(lat), -1, 8)
  #  idx = pos[:, :, 0].argsort(1)
    #lexsort的顺序是从最后一个传入的参数开始,依次到第一个传入的参数
    idx =np.lexsort((pos[:,:,0],pos[:,:,1]))
    sort_pos = pos[np.arange(len(pos))[:, None], idx, :]
    start3 = sort_pos[:,:,0:2].reshape(-1,2)
    n_atoms_list = np.array(n_atoms_list)

    return n_atoms_list, lat, sort_pos[:, :, [2, 3, 4]],start3



if __name__ == '__main__':
    file_name = "A.lammpstrj"

    print("Loading lmp trajectory")
    n_atoms, _, x,stc = read_lmp_trj(file_name=file_name)
    #np.savetxt('first_element.txt', x[0])
    #np.savetxt('end_element.txt',x[-1])
#30这里需要修改,是移动的原子个数


    with open('XDATCAR', 'w') as f:
        for i, element in enumerate(x):
            f.write(f'Direct configuration = {i + 1}\n')  # 注意这里的 i + 1
            np.savetxt(f, element, fmt='%.10f', delimiter='   ')

转载请注明来源 有问题可通过github提交issue