implement overwriting headers

This commit is contained in:
iuvbio 2019-01-31 23:37:53 +01:00
parent 8949578994
commit f5db18cd59
4 changed files with 37 additions and 14 deletions

View File

@ -27,8 +27,20 @@ Usage
from pandas_ods_reader import read_ods from pandas_ods_reader import read_ods
path = "path/to/file.ods" path = "path/to/file.ods"
# load a sheet based on its index (1 based)
sheet_idx = 1 sheet_idx = 1
df1 = read_ods(path, sheet_idx) df = read_ods(path, sheet_idx)
# load a sheet based on its name
sheet_name = "sheet1" sheet_name = "sheet1"
df2 = read_ods(path, sheet_name) df = read_ods(path, sheet_name)
# load a file that does not contain a header row
# if no columns are provided they will be numbered
df = read_ods(path, 1, headers=False)
# load a file and provide custom column names
# if headers is True (the default), the header row will be overwritten
df = read_ods(path, 1, columns=["A", "B", "C"])
``` ```

View File

@ -19,21 +19,22 @@ def load_ods(doc, sheet, headers=True, columns=None):
col_index = {} col_index = {}
for i, row in enumerate(sheet.rows()): for i, row in enumerate(sheet.rows()):
# row is a list of cells # row is a list of cells
if headers and i == 0: if headers and i == 0 and not columns:
# columns as lists in a dictionary # columns as lists in a dictionary
df_dict = {cell.value: [] for cell in row if cell.value} df_dict = {cell.value: [] for cell in row if cell.value}
# create index for the column headers # create index for the column headers
col_index = { col_index = {
j: cell.value for j, cell in enumerate(row) if cell.value} j: cell.value for j, cell in enumerate(row) if cell.value}
continue continue
elif not headers and i == 0: elif i == 0:
columns = columns if columns else ( columns = columns if columns else (
["column_%s" % j for j in range(len(row))]) ["column_%s" % j for j in range(len(row))])
# columns as lists in a dictionary # columns as lists in a dictionary
df_dict = {column: [] for column in columns} df_dict = {column: [] for column in columns}
# create index for the column headers # create index for the column headers
col_index = {j: column for j, column in enumerate(columns)} col_index = {j: column for j, column in enumerate(columns)}
continue if headers:
continue
for j, cell in enumerate(row): for j, cell in enumerate(row):
if j < len(col_index): if j < len(col_index):
# use header instead of column index # use header instead of column index

View File

@ -8,30 +8,40 @@ from pandas_ods_reader import read_ods
root = os.path.dirname(os.path.abspath(__file__)) root = os.path.dirname(os.path.abspath(__file__))
rsc = os.path.join(root, "rsc") rsc = os.path.join(root, "rsc")
header_file = "example_headers.ods"
no_header_file = "example_no_headers.ods"
class TestOdsReader(object): class TestOdsReader(object):
def test_header_file_with_int(self): def test_header_file_with_int(self):
example = "example_headers.ods" path = os.path.join(rsc, header_file)
path = os.path.join(rsc, example)
df = read_ods(path, 1) df = read_ods(path, 1)
assert isinstance(df, pd.DataFrame) assert isinstance(df, pd.DataFrame)
assert len(df) == 10
def test_header_file_with_str(self): def test_header_file_with_str(self):
example = "example_headers.ods" path = os.path.join(rsc, header_file)
path = os.path.join(rsc, example)
df = read_ods(path, "Sheet1") df = read_ods(path, "Sheet1")
assert isinstance(df, pd.DataFrame) assert isinstance(df, pd.DataFrame)
assert len(df) == 10
def test_header_file_with_cols(self):
path = os.path.join(rsc, header_file)
columns = ["One", "Two", "Three", "Four", "Five"]
df = read_ods(path, "Sheet1", columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
def test_no_header_file_no_cols(self): def test_no_header_file_no_cols(self):
example = "example_no_headers.ods" path = os.path.join(rsc, no_header_file)
path = os.path.join(rsc, example)
df = read_ods(path, 1, headers=False) df = read_ods(path, 1, headers=False)
assert list(df.columns) == [ assert list(df.columns) == [
"column_%s" % i for i in range(len(df.columns))] "column_%s" % i for i in range(len(df.columns))]
assert len(df) == 10
def test_no_header_file_with_cols(self): def test_no_header_file_with_cols(self):
example = "example_headers.ods" path = os.path.join(rsc, no_header_file)
path = os.path.join(rsc, example)
columns = ["A", "B", "C", "D", "E"] columns = ["A", "B", "C", "D", "E"]
df = read_ods(path, 1, headers=False, columns=columns) df = read_ods(path, 1, headers=False, columns=columns)
assert list(df.columns) == columns assert list(df.columns) == columns
assert len(df) == 10

View File

@ -1,7 +1,7 @@
from setuptools import setup, find_packages from setuptools import setup, find_packages
VERSION = "0.0.5" VERSION = "0.0.6"
with open("README.md", "r") as fh: with open("README.md", "r") as fh:
long_description = fh.read() long_description = fh.read()