python常用的读取文件程序

python常用的读取文件操作

python读取和保存csv、pkl、json、numpy等文件

加载CSV文件和读取csv文件

加载csv文件返回的是一个list[orderdict],每一行是一个orderdict,和普通的dict是一样的,不过保持了原来的顺序。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import csv
from typing import List, Any, Dict

Path = str

# 加载csv文件
# 参数表示文件路径,文件分隔符(csv文件默认分隔符是",")
def load_csv(file_path: Path, replace_split: str = '') -> Any:
is_tsv = True if '.tsv' in file_path else False
dialect = 'excel-tab' if is_tsv else 'excel'
with open(file_path, 'r', encoding='utf-8-sig') as file:
reader = csv.DictReader(file, dialect=dialect)
reader = list(reader)
# replace the split char
if replace_split:
data_list = []
for key in reader:
key[replace_split] = ''.join(key[replace_split].split())
data_list.append(key)
return data_list
return reader


def save_csv(data: List[Dict], file_path: Path, write_head: bool = True) -> Any:
save_in_tsv = True if '.tsv' in file_path else False
dialect = 'excel-tab' if save_in_tsv else 'excel'
with open(file_path, 'w', encoding='utf-8', newline='') as file:
fieldnames = list(data[0].keys())
writer = csv.DictWriter(file, fieldnames=fieldnames, dialect=dialect)
if write_head:
writer.writeheader()
writer.writerows(data)

加载pkl文件和读取pkl文件

pkl文件时python的序列化文件,一些内存里的对象可以保存到pkl文件里面。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pickle
from typing import Any

Path = str


def load_pkl(file_path: Path) -> Any:
with open(file_path, 'rb') as f:
data = pickle.load(f)
return data


def save_pkl(data: Any, file_path: Path) -> None:
with open(file_path, 'wb') as f:
pickle.dump(data, f)

加载numpy文件和读取numpy文件

1
2
3
4
5
6
7
8
9
10
11
import numpy as np

Path = str


def save_numpy(data, file_path: Path):
np.save(str(file_path), data)


def load_numpy(file_path: Path):
np.load(str(file_path))

加载json文件读取json文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import json

Path = str


def save_json(data: dict, file_path: Path):
with open(str(file_path), 'w') as f:
json.dump(data, f)


def load_json(file_path: Path):
with open(str(file_path), 'r') as f:
data = json.load(f)
return data


def json_to_text(file_path: Path, data):
with open(str(file_path), 'w') as fw:
for line in data:
line = json.dumps(line, ensure_ascii=False)
fw.write(line + '\n')