python常用的读取文件操作

python读取和保存csv、pkl、json、numpy等文件

加载CSV文件和读取csv文件

加载csv文件返回的是一个list[orderdict]，每一行是一个orderdict，和普通的dict是一样的，不过保持了原来的顺序。

import csv
from typing import List, Any, Dict

Path = str

# 加载csv文件
# 参数表示文件路径，文件分隔符（csv文件默认分隔符是","）
def load_csv(file_path: Path, replace_split: str = '') -> Any:
    is_tsv = True if '.tsv' in file_path else False
    dialect = 'excel-tab' if is_tsv else 'excel'
    with open(file_path, 'r', encoding='utf-8-sig') as file:
        reader = csv.DictReader(file, dialect=dialect)
        reader = list(reader)
        # replace the split char
        if replace_split:
            data_list = []
            for key in reader:
                key[replace_split] = ''.join(key[replace_split].split())
                data_list.append(key)
            return data_list
        return reader


def save_csv(data: List[Dict], file_path: Path, write_head: bool = True) -> Any:
    save_in_tsv = True if '.tsv' in file_path else False
    dialect = 'excel-tab' if save_in_tsv else 'excel'
    with open(file_path, 'w', encoding='utf-8', newline='') as file:
        fieldnames = list(data[0].keys())
        writer = csv.DictWriter(file, fieldnames=fieldnames, dialect=dialect)
        if write_head:
            writer.writeheader()
        writer.writerows(data)

加载pkl文件和读取pkl文件

pkl文件时python的序列化文件，一些内存里的对象可以保存到pkl文件里面。

import pickle
from typing import Any

Path = str


def load_pkl(file_path: Path) -> Any:
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
        return data


def save_pkl(data: Any, file_path: Path) -> None:
    with open(file_path, 'wb') as f:
        pickle.dump(data, f)

加载numpy文件和读取numpy文件

import numpy as np

Path = str


def save_numpy(data, file_path: Path):
    np.save(str(file_path), data)


def load_numpy(file_path: Path):
    np.load(str(file_path))

加载json文件读取json文件

import json

Path = str


def save_json(data: dict, file_path: Path):
    with open(str(file_path), 'w') as f:
        json.dump(data, f)


def load_json(file_path: Path):
    with open(str(file_path), 'r') as f:
        data = json.load(f)
    return data


def json_to_text(file_path: Path, data):
    with open(str(file_path), 'w') as fw:
        for line in data:
            line = json.dumps(line, ensure_ascii=False)
            fw.write(line + '\n')

自然语言处理的技巧

python常用的读取文件程序

python常用的读取文件操作

加载CSV文件和读取csv文件

加载pkl文件和读取pkl文件

加载numpy文件和读取numpy文件

加载json文件读取json文件