implement overwriting headers
This commit is contained in:
parent
8949578994
commit
f5db18cd59
16
README.md
16
README.md
|
|
@ -27,8 +27,20 @@ Usage
|
||||||
from pandas_ods_reader import read_ods
|
from pandas_ods_reader import read_ods
|
||||||
|
|
||||||
path = "path/to/file.ods"
|
path = "path/to/file.ods"
|
||||||
|
|
||||||
|
# load a sheet based on its index (1 based)
|
||||||
sheet_idx = 1
|
sheet_idx = 1
|
||||||
df1 = read_ods(path, sheet_idx)
|
df = read_ods(path, sheet_idx)
|
||||||
|
|
||||||
|
# load a sheet based on its name
|
||||||
sheet_name = "sheet1"
|
sheet_name = "sheet1"
|
||||||
df2 = read_ods(path, sheet_name)
|
df = read_ods(path, sheet_name)
|
||||||
|
|
||||||
|
# load a file that does not contain a header row
|
||||||
|
# if no columns are provided they will be numbered
|
||||||
|
df = read_ods(path, 1, headers=False)
|
||||||
|
|
||||||
|
# load a file and provide custom column names
|
||||||
|
# if headers is True (the default), the header row will be overwritten
|
||||||
|
df = read_ods(path, 1, columns=["A", "B", "C"])
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -19,21 +19,22 @@ def load_ods(doc, sheet, headers=True, columns=None):
|
||||||
col_index = {}
|
col_index = {}
|
||||||
for i, row in enumerate(sheet.rows()):
|
for i, row in enumerate(sheet.rows()):
|
||||||
# row is a list of cells
|
# row is a list of cells
|
||||||
if headers and i == 0:
|
if headers and i == 0 and not columns:
|
||||||
# columns as lists in a dictionary
|
# columns as lists in a dictionary
|
||||||
df_dict = {cell.value: [] for cell in row if cell.value}
|
df_dict = {cell.value: [] for cell in row if cell.value}
|
||||||
# create index for the column headers
|
# create index for the column headers
|
||||||
col_index = {
|
col_index = {
|
||||||
j: cell.value for j, cell in enumerate(row) if cell.value}
|
j: cell.value for j, cell in enumerate(row) if cell.value}
|
||||||
continue
|
continue
|
||||||
elif not headers and i == 0:
|
elif i == 0:
|
||||||
columns = columns if columns else (
|
columns = columns if columns else (
|
||||||
["column_%s" % j for j in range(len(row))])
|
["column_%s" % j for j in range(len(row))])
|
||||||
# columns as lists in a dictionary
|
# columns as lists in a dictionary
|
||||||
df_dict = {column: [] for column in columns}
|
df_dict = {column: [] for column in columns}
|
||||||
# create index for the column headers
|
# create index for the column headers
|
||||||
col_index = {j: column for j, column in enumerate(columns)}
|
col_index = {j: column for j, column in enumerate(columns)}
|
||||||
continue
|
if headers:
|
||||||
|
continue
|
||||||
for j, cell in enumerate(row):
|
for j, cell in enumerate(row):
|
||||||
if j < len(col_index):
|
if j < len(col_index):
|
||||||
# use header instead of column index
|
# use header instead of column index
|
||||||
|
|
|
||||||
|
|
@ -8,30 +8,40 @@ from pandas_ods_reader import read_ods
|
||||||
root = os.path.dirname(os.path.abspath(__file__))
|
root = os.path.dirname(os.path.abspath(__file__))
|
||||||
rsc = os.path.join(root, "rsc")
|
rsc = os.path.join(root, "rsc")
|
||||||
|
|
||||||
|
header_file = "example_headers.ods"
|
||||||
|
no_header_file = "example_no_headers.ods"
|
||||||
|
|
||||||
|
|
||||||
class TestOdsReader(object):
|
class TestOdsReader(object):
|
||||||
def test_header_file_with_int(self):
|
def test_header_file_with_int(self):
|
||||||
example = "example_headers.ods"
|
path = os.path.join(rsc, header_file)
|
||||||
path = os.path.join(rsc, example)
|
|
||||||
df = read_ods(path, 1)
|
df = read_ods(path, 1)
|
||||||
assert isinstance(df, pd.DataFrame)
|
assert isinstance(df, pd.DataFrame)
|
||||||
|
assert len(df) == 10
|
||||||
|
|
||||||
def test_header_file_with_str(self):
|
def test_header_file_with_str(self):
|
||||||
example = "example_headers.ods"
|
path = os.path.join(rsc, header_file)
|
||||||
path = os.path.join(rsc, example)
|
|
||||||
df = read_ods(path, "Sheet1")
|
df = read_ods(path, "Sheet1")
|
||||||
assert isinstance(df, pd.DataFrame)
|
assert isinstance(df, pd.DataFrame)
|
||||||
|
assert len(df) == 10
|
||||||
|
|
||||||
|
def test_header_file_with_cols(self):
|
||||||
|
path = os.path.join(rsc, header_file)
|
||||||
|
columns = ["One", "Two", "Three", "Four", "Five"]
|
||||||
|
df = read_ods(path, "Sheet1", columns=columns)
|
||||||
|
assert list(df.columns) == columns
|
||||||
|
assert len(df) == 10
|
||||||
|
|
||||||
def test_no_header_file_no_cols(self):
|
def test_no_header_file_no_cols(self):
|
||||||
example = "example_no_headers.ods"
|
path = os.path.join(rsc, no_header_file)
|
||||||
path = os.path.join(rsc, example)
|
|
||||||
df = read_ods(path, 1, headers=False)
|
df = read_ods(path, 1, headers=False)
|
||||||
assert list(df.columns) == [
|
assert list(df.columns) == [
|
||||||
"column_%s" % i for i in range(len(df.columns))]
|
"column_%s" % i for i in range(len(df.columns))]
|
||||||
|
assert len(df) == 10
|
||||||
|
|
||||||
def test_no_header_file_with_cols(self):
|
def test_no_header_file_with_cols(self):
|
||||||
example = "example_headers.ods"
|
path = os.path.join(rsc, no_header_file)
|
||||||
path = os.path.join(rsc, example)
|
|
||||||
columns = ["A", "B", "C", "D", "E"]
|
columns = ["A", "B", "C", "D", "E"]
|
||||||
df = read_ods(path, 1, headers=False, columns=columns)
|
df = read_ods(path, 1, headers=False, columns=columns)
|
||||||
assert list(df.columns) == columns
|
assert list(df.columns) == columns
|
||||||
|
assert len(df) == 10
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue