diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py
index b9d4404..fe18cfe 100644
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@@ -1,28 +1,14 @@
-"""Imports an ods file into a DataFrame object"""
-import ezodf
+"""Imports an ods or fods file into a DataFrame object"""
+from pathlib import Path
-from .parsers import ods
-from .tools import sanitize_df
+from .parsers import fods, ods
+
+
+EXT_MAP = {".ods": ods, ".fods": fods}
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
- """
- This function reads in the provided ods file and converts it to a
- dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
- and columns are dropped from the DataFrame, before it is returned.
-
- :param file_or_path: str
- the path to the ODS file
- :param sheet: int or str, default 1
- if int, the 1 based index of the sheet to be read in. If str, the name of
- the sheet to be read in
- :param header: bool, default True
- if True, the first row is read in as headers
- :param columns: list, default None
- a list of column names to be used as headers
- :returns: pandas.DataFrame
- the ODS file as a pandas DataFrame
- """
- doc = ezodf.opendoc(file_or_path)
- df = ods.load_ods(doc, sheet, headers, columns)
- return sanitize_df(df)
+ loader = EXT_MAP.get(Path(file_or_path).suffix)
+ if not loader:
+ raise ValueError("Unknown filetype.")
+ return loader.read(file_or_path, sheet, headers=headers, columns=columns)
diff --git a/pandas_ods_reader/parsers/fods.py b/pandas_ods_reader/parsers/fods.py
new file mode 100644
index 0000000..3f747b9
--- /dev/null
+++ b/pandas_ods_reader/parsers/fods.py
@@ -0,0 +1,115 @@
+from collections import defaultdict
+
+from lxml import etree
+import pandas as pd
+
+from ..tools import sanitize_df
+
+
+BODY_TAG = "office:body"
+SPREADSHEET_TAG = "office:spreadsheet"
+OFFICE_KEY = "office"
+TABLE_KEY = "table"
+TABLE_TAG = "table:table"
+TABLE_ROW_TAG = "table:table-row"
+TABLE_CELL_TAG = "table:table-cell"
+TABLE_CELL_TEXT_TAG = "text:p"
+TABLE_CELL_REPEATED_ATTRIB = "number-columns-repeated"
+VALUE_TYPE_ATTRIB = "value-type"
+
+
+def get_sheet(spreadsheet, sheet_id):
+ namespaces = spreadsheet.nsmap
+ if isinstance(sheet_id, str):
+ sheet = spreadsheet.find(
+ f"{TABLE_TAG}[@table:name='{sheet_id}']", namespaces=namespaces
+ )
+ if sheet is None:
+ raise KeyError(f"There is no sheet named {sheet_id}.")
+ return sheet
+ tables = spreadsheet.findall(TABLE_TAG, namespaces=namespaces)
+ if sheet_id == 0 or sheet_id > len(tables):
+ raise IndexError(f"There is no sheet at index {sheet_id}.")
+ return tables[sheet_id - 1]
+
+
+def parse_columns(cells, headers=True, columns=None):
+ orig_columns = cells.pop(0) if headers else None
+ if columns is None:
+ if orig_columns:
+ repeated_val = None
+ columns = []
+ repeated_dict = defaultdict(lambda: 0)
+ for i, col in enumerate(orig_columns):
+ text = col.find(TABLE_CELL_TEXT_TAG, namespaces=col.nsmap)
+ if text is not None:
+ value = text.text
+ elif text is None and repeated_val:
+ value = repeated_val
+ else:
+ value = "unnamed"
+ idx = 1
+ while "{}.{}".format(value, idx) in columns:
+ idx += 1
+ value = f"{value}.{idx}"
+ repeated = col.attrib.get(
+ f"{{{col.nsmap[TABLE_KEY]}}}{TABLE_CELL_REPEATED_ATTRIB}"
+ )
+ if repeated:
+ repeated_dict[value] += 1
+ repeated_val = f"{value}.{repeated_dict[value]}"
+ column = value if value not in columns else f"{value}.{i}"
+ columns.append(column)
+ else:
+ columns = [f"column.{i}" for i in range(len(cells[0]))]
+ return columns, cells
+
+
+def parse_value(cell):
+ text = cell.find(TABLE_CELL_TEXT_TAG, namespaces=cell.nsmap)
+ is_float = (
+ cell.attrib.get(f"{{{cell.nsmap[OFFICE_KEY]}}}{VALUE_TYPE_ATTRIB}") == "float"
+ )
+ if text is None:
+ return None
+ value = text.text
+ if is_float:
+ return float(value)
+ return value
+
+
+def load_fods(doc, sheet_id, headers=True, columns=None):
+ if not isinstance(sheet_id, (str, int)):
+ raise ValueError("Sheet id has to be either `str` or `int`")
+ root = doc.getroot()
+ namespaces = root.nsmap
+ spreadsheet = doc.find(BODY_TAG, namespaces=namespaces).find(
+ SPREADSHEET_TAG, namespaces=namespaces
+ )
+ sheet = get_sheet(spreadsheet, sheet_id)
+ rows = sheet.findall(TABLE_ROW_TAG, namespaces=namespaces)
+ allcells = []
+ for row in rows:
+ cells = row.findall(TABLE_CELL_TAG, namespaces=namespaces)
+ allcells.append(cells)
+ columns, values = parse_columns(allcells, headers, columns)
+ data = []
+ for row in values:
+ rowvalues = [parse_value(cell) for cell in row]
+ data.append(rowvalues)
+ final_rows = []
+ for row in data:
+ final_row = []
+ for i in range(len(columns)):
+ if i < len(row):
+ final_row.append(row[i])
+ else:
+ final_row.append(None)
+ final_rows.append(final_row)
+ return pd.DataFrame(final_rows, columns=columns)
+
+
+def read(file_or_path, sheet=1, headers=True, columns=None):
+ doc = etree.parse(str(file_or_path))
+ df = load_fods(doc, sheet, headers=headers, columns=columns)
+ return sanitize_df(df)
diff --git a/pandas_ods_reader/parsers/ods.py b/pandas_ods_reader/parsers/ods.py
index d92ce79..55d3a2b 100644
--- a/pandas_ods_reader/parsers/ods.py
+++ b/pandas_ods_reader/parsers/ods.py
@@ -1,7 +1,10 @@
from collections import OrderedDict
+import ezodf
import pandas as pd
+from ..tools import sanitize_df
+
def load_ods(doc, sheet_id, headers=True, columns=None):
# convert the sheet to a pandas.DataFrame
@@ -51,3 +54,26 @@ def load_ods(doc, sheet_id, headers=True, columns=None):
continue
df = pd.DataFrame(df_dict)
return df
+
+
+def read(file_or_path, sheet=1, headers=True, columns=None):
+ """
+ This function reads in the provided ods file and converts it to a
+ dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
+ and columns are dropped from the DataFrame, before it is returned.
+
+ :param file_or_path: str
+ the path to the ODS file
+ :param sheet: int or str, default 1
+ if int, the 1 based index of the sheet to be read in. If str, the name of
+ the sheet to be read in
+ :param header: bool, default True
+ if True, the first row is read in as headers
+ :param columns: list, default None
+ a list of column names to be used as headers
+ :returns: pandas.DataFrame
+ the ODS file as a pandas DataFrame
+ """
+ doc = ezodf.opendoc(file_or_path)
+ df = load_ods(doc, sheet, headers, columns)
+ return sanitize_df(df)
diff --git a/tests/rsc/example_col_lengths.fods b/tests/rsc/example_col_lengths.fods
new file mode 100644
index 0000000..b5084f5
--- /dev/null
+++ b/tests/rsc/example_col_lengths.fods
@@ -0,0 +1,451 @@
+
+
+
+ Lukas Jansen2019-01-27T03:31:08.9314826322019-06-06T11:51:47.467971713Lukas JansenPT2M31S2LibreOffice/7.1.5.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 15185
+ 4967
+
+
+ view1
+
+
+ 7
+ 14
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A
+
+
+ B
+
+
+ C
+
+
+ D
+
+
+ E
+
+
+
+
+ 2
+
+
+ 28
+
+
+ 76
+
+
+ 89
+
+
+ 60
+
+
+
+
+ 69
+
+
+ 6
+
+
+ 33
+
+
+ 7
+
+
+ 85
+
+
+
+
+ 48
+
+
+ 14
+
+
+ 48
+
+
+ 14
+
+
+ 61
+
+
+
+
+ 25
+
+
+ 9
+
+
+ 49
+
+
+ 91
+
+
+ 39
+
+
+
+
+ 62
+
+
+ 57
+
+
+ 96
+
+
+ 100
+
+
+ 28
+
+
+
+
+ 0
+
+
+ 85
+
+
+ 83
+
+
+ 50
+
+
+ 58
+
+
+
+
+ 33
+
+
+ 10
+
+
+ 56
+
+
+ 46
+
+
+ 30
+
+
+
+
+ 29
+
+
+ 99
+
+
+ 100
+
+
+ 45
+
+
+ 96
+
+
+
+
+ 62
+
+
+ 37
+
+
+ 16
+
+
+ 37
+
+
+ 51
+
+
+
+
+ 13
+
+
+ 48
+
+
+ 71
+
+
+ 5
+
+
+ 34
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/rsc/example_duplicated_column_names.fods b/tests/rsc/example_duplicated_column_names.fods
new file mode 100644
index 0000000..a8155a0
--- /dev/null
+++ b/tests/rsc/example_duplicated_column_names.fods
@@ -0,0 +1,485 @@
+
+
+
+ 2019-05-31T10:36:15.9187991642019-06-06T14:41:16.030513765PT16H2M57S244LibreOffice/7.1.5.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 9133
+ 1806
+
+
+ view1
+
+
+ 2
+ 6
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 1
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+
+
+ en
+ GB
+
+
+
+
+
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+ :
+
+
+
+
+ :
+
+ :
+
+
+
+
+ :
+
+
+
+
+
+
+
+ /
+
+ /
+
+
+
+
+ -
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ID
+
+
+ name
+
+
+ website
+
+
+
+
+
+ Acto_1
+
+
+ W
+
+
+ sitea
+
+
+
+
+
+ Acto_2
+
+
+ D
+
+
+
+ siteb
+
+
+
+
+
+ Acto_3
+
+
+ S
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/rsc/example_headers.fods b/tests/rsc/example_headers.fods
new file mode 100644
index 0000000..250985d
--- /dev/null
+++ b/tests/rsc/example_headers.fods
@@ -0,0 +1,441 @@
+
+
+
+ Lukas Jansen2019-01-27T03:31:08.9314826322019-01-27T03:33:20.620959045Lukas JansenPT2M14S1LibreOffice/7.1.4.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 11288
+ 4967
+
+
+ view1
+
+
+ 6
+ 20
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+ A
+
+
+ B
+
+
+ C
+
+
+ D
+
+
+ E
+
+
+
+
+ 62
+
+
+ 22
+
+
+ 83
+
+
+ 35
+
+
+ 35
+
+
+
+
+ 100
+
+
+ 56
+
+
+ 91
+
+
+ 29
+
+
+ 57
+
+
+
+
+ 11
+
+
+ 71
+
+
+ 68
+
+
+ 53
+
+
+ 32
+
+
+
+
+ 84
+
+
+ 26
+
+
+ 3
+
+
+ 21
+
+
+ 17
+
+
+
+
+ 11
+
+
+ 56
+
+
+ 26
+
+
+ 25
+
+
+ 30
+
+
+
+
+ 61
+
+
+ 3
+
+
+ 35
+
+
+ 98
+
+
+ 62
+
+
+
+
+ 22
+
+
+ 96
+
+
+ 10
+
+
+ 53
+
+
+ 34
+
+
+
+
+ 25
+
+
+ 33
+
+
+ 86
+
+
+ 38
+
+
+ 89
+
+
+
+
+ 25
+
+
+ 93
+
+
+ 31
+
+
+ 72
+
+
+ 60
+
+
+
+
+ 19
+
+
+ 64
+
+
+ 42
+
+
+ 38
+
+
+ 28
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/rsc/example_missing_header.fods b/tests/rsc/example_missing_header.fods
new file mode 100644
index 0000000..feb88b9
--- /dev/null
+++ b/tests/rsc/example_missing_header.fods
@@ -0,0 +1,439 @@
+
+
+
+ Lukas Jansen2019-01-27T03:31:08.9314826322019-06-08T15:24:55.731863115Lukas JansenPT4M44S2LibreOffice/7.1.5.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 11288
+ 4967
+
+
+ view1
+
+
+ 7
+ 9
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+ A
+
+
+ B
+
+
+
+ D
+
+
+ E
+
+
+
+
+ 71
+
+
+ 19
+
+
+ 21
+
+
+ 73
+
+
+ 47
+
+
+
+
+ 19
+
+
+ 67
+
+
+ 7
+
+
+ 51
+
+
+ 26
+
+
+
+
+ 20
+
+
+ 57
+
+
+ 29
+
+
+ 69
+
+
+ 27
+
+
+
+
+ 17
+
+
+ 91
+
+
+ 73
+
+
+ 3
+
+
+ 45
+
+
+
+
+ 35
+
+
+ 40
+
+
+ 41
+
+
+ 66
+
+
+ 35
+
+
+
+
+ 17
+
+
+ 21
+
+
+ 14
+
+
+ 0
+
+
+ 0
+
+
+
+
+ 45
+
+
+ 72
+
+
+ 32
+
+
+ 21
+
+
+ 47
+
+
+
+
+ 29
+
+
+ 90
+
+
+ 21
+
+
+ 82
+
+
+ 5
+
+
+
+
+ 94
+
+
+ 49
+
+
+ 5
+
+
+ 22
+
+
+ 54
+
+
+
+
+ 59
+
+
+ 8
+
+
+ 11
+
+
+ 56
+
+
+ 81
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/rsc/example_no_headers.fods b/tests/rsc/example_no_headers.fods
new file mode 100644
index 0000000..a25b140
--- /dev/null
+++ b/tests/rsc/example_no_headers.fods
@@ -0,0 +1,424 @@
+
+
+
+ Lukas Jansen2019-01-27T03:31:08.9314826322019-01-27T03:33:44.899304723Lukas JansenPT2M38S2LibreOffice/7.1.5.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 11288
+ 4515
+
+
+ view1
+
+
+ 2
+ 10
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+ 37
+
+
+ 94
+
+
+ 39
+
+
+ 85
+
+
+ 32
+
+
+
+
+ 66
+
+
+ 11
+
+
+ 99
+
+
+ 27
+
+
+ 41
+
+
+
+
+ 92
+
+
+ 80
+
+
+ 57
+
+
+ 57
+
+
+ 90
+
+
+
+
+ 47
+
+
+ 16
+
+
+ 58
+
+
+ 10
+
+
+ 40
+
+
+
+
+ 76
+
+
+ 4
+
+
+ 95
+
+
+ 58
+
+
+ 9
+
+
+
+
+ 18
+
+
+ 17
+
+
+ 53
+
+
+ 58
+
+
+ 57
+
+
+
+
+ 39
+
+
+ 31
+
+
+ 37
+
+
+ 90
+
+
+ 91
+
+
+
+
+ 40
+
+
+ 62
+
+
+ 10
+
+
+ 69
+
+
+ 14
+
+
+
+
+ 69
+
+
+ 15
+
+
+ 7
+
+
+ 80
+
+
+ 73
+
+
+
+
+ 99
+
+
+ 15
+
+
+ 78
+
+
+ 53
+
+
+ 79
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/rsc/mixed_dtypes.fods b/tests/rsc/mixed_dtypes.fods
new file mode 100644
index 0000000..3789653
--- /dev/null
+++ b/tests/rsc/mixed_dtypes.fods
@@ -0,0 +1,441 @@
+
+
+
+ Lukas Jansen2019-01-27T03:31:08.9314826322020-02-23T16:02:58.759849276Lukas JansenPT7M9S3LibreOffice/7.1.5.2$Linux_X86_64 LibreOffice_project/10$Build-2
+
+
+ 0
+ 0
+ 11288
+ 4967
+
+
+ view1
+
+
+ 6
+ 15
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 100
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1849
+ 0
+ 100
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ false
+ 1000
+ true
+ 1
+ 12632256
+ true
+ true
+ true
+ jQH+/01GQ0o0OTFEVwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpNRkNKNDkxRFcAAAAAAAAAAAAAAAAAAAAAAAAWAAMArgAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9TUZDSjQ5MURXCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==
+ 1000
+ 7
+ false
+ true
+ true
+ 1
+ true
+ false
+ true
+ false
+ true
+ true
+ MFCJ491DW
+ false
+ 0
+ 3
+ true
+ false
+ false
+ false
+ true
+ false
+ true
+
+
+ Sheet1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ???(???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1/ 99
+
+
+
+
+
+
+
+
+
+
+
+ A
+
+
+ B
+
+
+ C
+
+
+ D
+
+
+ E
+
+
+
+
+ 6
+
+
+ 93
+
+
+ 43
+
+
+ 95
+
+
+ A
+
+
+
+
+ 24
+
+
+ 0.73
+
+
+ 50
+
+
+ 78
+
+
+ B
+
+
+
+
+ 13
+
+
+ 36
+
+
+ 44
+
+
+ 57
+
+
+ C
+
+
+
+
+ 10
+
+
+ 46
+
+
+ 56
+
+
+ 69
+
+
+ D
+
+
+
+
+ 34
+
+
+ S
+
+
+ 0.52
+
+
+ 34
+
+
+ E
+
+
+
+
+ 24
+
+
+ Q
+
+
+ 43
+
+
+ 93
+
+
+ A
+
+
+
+
+ 43
+
+
+ 15
+
+
+ 95
+
+
+ 89
+
+
+ B
+
+
+
+
+ 67
+
+
+ 0.89
+
+
+ 57
+
+
+ 44
+
+
+ C
+
+
+
+
+ 74
+
+
+ 26
+
+
+ 0.77
+
+
+ 3
+
+
+ D
+
+
+
+
+ 2
+
+
+ 14
+
+
+ 93
+
+
+ 54
+
+
+ E
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/test_read_ods.py b/tests/test_read_ods.py
index 4537702..772d650 100644
--- a/tests/test_read_ods.py
+++ b/tests/test_read_ods.py
@@ -19,117 +19,127 @@ mixed_dtypes_file = "mixed_dtypes.ods"
class TestOdsReader:
-
- def test_header_file_simple(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_header_file_simple(self, suffix):
path = rsc / header_file
- df = read_ods(path)
+ df = read_ods(path.with_suffix(suffix))
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_header_file_with_int(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_header_file_with_int(self, suffix):
path = rsc / header_file
- df = read_ods(path, 1)
+ df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_header_file_with_str(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_header_file_with_str(self, suffix):
path = rsc / header_file
- df = read_ods(path, "Sheet1")
+ df = read_ods(path.with_suffix(suffix), "Sheet1")
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_header_file_with_cols(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_header_file_with_cols(self, suffix):
path = rsc / header_file
columns = ["One", "Two", "Three", "Four", "Five"]
- df = read_ods(path, "Sheet1", columns=columns)
+ df = read_ods(path.with_suffix(suffix), "Sheet1", columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_no_header_file_no_cols(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_no_header_file_no_cols(self, suffix):
path = rsc / no_header_file
- df = read_ods(path, 1, headers=False)
+ df = read_ods(path.with_suffix(suffix), 1, headers=False)
- assert list(df.columns) == [
- f"column.{i}" for i in range(len(df.columns))]
+ assert list(df.columns) == [f"column.{i}" for i in range(len(df.columns))]
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_no_header_file_with_cols(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_no_header_file_with_cols(self, suffix):
path = rsc / no_header_file
columns = ["A", "B", "C", "D", "E"]
- df = read_ods(path, 1, headers=False, columns=columns)
+ df = read_ods(path.with_suffix(suffix), 1, headers=False, columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
- def test_duplicated_column_names(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_duplicated_column_names(self, suffix):
path = rsc / duplicated_column_names_file
- df = read_ods(path, 1)
+ df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 4
assert "website.1" in df.columns
- def test_header_file_col_len(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_header_file_col_len(self, suffix):
path = rsc / col_len_file
- df = read_ods(path, 1)
+ df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
- def test_wrong_id_type(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_wrong_id_type(self, suffix):
path = rsc / header_file
with pytest.raises(ValueError) as e_info:
- read_ods(path, 1.0)
+ read_ods(path.with_suffix(suffix), 1.0)
assert e_info.match("Sheet id has to be either `str` or `int`")
- def test_non_existent_sheet(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_non_existent_sheet(self, suffix):
path = rsc / header_file
sheet_name = "No_Sheet"
with pytest.raises(KeyError) as e_info:
- read_ods(path, sheet_name)
+ read_ods(path.with_suffix(suffix), sheet_name)
assert e_info.match(f"There is no sheet named {sheet_name}")
- def test_missing_header(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_missing_header(self, suffix):
path = rsc / missing_header_file
- df = read_ods(path, 1)
+ df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
assert df.columns[2] == "unnamed.1"
- def test_mixed_dtypes(self):
+ @pytest.mark.parametrize("suffix", [".ods", ".fods"])
+ def test_mixed_dtypes(self, suffix):
path = rsc / mixed_dtypes_file
- df = read_ods(path, 1)
+ df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
- assert (len(df.columns) == 5)
+ assert len(df.columns) == 5
type_list = [float, object, float, float, object]
assert df.dtypes.tolist() == type_list