Merge branch 'master' of https://github.com/iuvbio/pandas_ods_reader

2019-06-08 14:21:34 +02:00 · 2019-06-08 14:21:34 +02:00 · d8d71f610d
parent 9240a1b3ae 46b31a9e8c
commit d8d71f610d
3 changed files with 24 additions and 12 deletions
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@ -6,16 +6,16 @@ from collections import OrderedDict
 from .tools import sanitize_df


-def load_ods(doc, sheet, headers=True, columns=None):
+def load_ods(doc, sheet_id, headers=True, columns=None):
    # convert the sheet to a pandas.DataFrame
-    if isinstance(sheet, int):
-        sheet = doc.sheets[sheet - 1]
-    elif isinstance(sheet, str):
+    if not isinstance(sheet_id, (int, str)):
+        raise ValueError("Sheet id has to be either `str` or `int`")
+    if isinstance(sheet_id, str):
        sheets = [sheet.name for sheet in doc.sheets]
-        if sheet not in sheets:
-            raise ValueError("There is no sheet named {}".format(sheet))
-        sheet_idx = sheets.index(sheet)
-        sheet = doc.sheets[sheet_idx]
+        if sheet_id not in sheets:
+            raise ValueError("There is no sheet named {}".format(sheet_id))
+        sheet_id = sheets.index(sheet_id) + 1
+    sheet = doc.sheets[sheet_id - 1]
    df_dict = OrderedDict()
    col_index = {}
    for i, row in enumerate(sheet.rows()):
@ -42,7 +42,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
            continue
        elif i == 0:
            columns = columns if columns else (
-                ["column_%s" % j for j in range(len(row))])
+                [f"column_{j}" for j in range(len(row))])
            # columns as lists in a dictionary
            df_dict = OrderedDict((column, []) for column in columns)
            # create index for the column headers
@ -62,8 +62,8 @@ def load_ods(doc, sheet, headers=True, columns=None):
 def read_ods(file_or_path, sheet, headers=True, columns=None):
    """
    This function reads in the provided ods file and converts it to a
-    dictionary. The dictionary is converted to a DataFrame. Empty rows and
-    columns are dropped from the DataFrame, before it is returned.
+    dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
+    and columns are dropped from the DataFrame, before it is returned.

    :param file_or_path: str
    the path to the ODS file
--- a/pandas_ods_reader/tests/rsc/example_col_lengths.ods
+++ b/pandas_ods_reader/tests/rsc/example_col_lengths.ods
--- a/pandas_ods_reader/tests/test_read_ods.py
+++ b/pandas_ods_reader/tests/test_read_ods.py
@ -11,6 +11,7 @@ rsc = os.path.join(root, "rsc")
 header_file = "example_headers.ods"
 no_header_file = "example_no_headers.ods"
 duplicated_column_names_file = "example_duplicated_column_names.ods"
+col_len_file = "example_col_lengths.ods"


 class TestOdsReader(object):
@ -19,12 +20,14 @@ class TestOdsReader(object):
        df = read_ods(path, 1)
        assert isinstance(df, pd.DataFrame)
        assert len(df) == 10
+        assert (len(df.columns) == 5)

    def test_header_file_with_str(self):
        path = os.path.join(rsc, header_file)
        df = read_ods(path, "Sheet1")
        assert isinstance(df, pd.DataFrame)
        assert len(df) == 10
+        assert (len(df.columns) == 5)

    def test_header_file_with_cols(self):
        path = os.path.join(rsc, header_file)
@ -32,13 +35,15 @@ class TestOdsReader(object):
        df = read_ods(path, "Sheet1", columns=columns)
        assert list(df.columns) == columns
        assert len(df) == 10
+        assert (len(df.columns) == 5)

    def test_no_header_file_no_cols(self):
        path = os.path.join(rsc, no_header_file)
        df = read_ods(path, 1, headers=False)
        assert list(df.columns) == [
-            "column_%s" % i for i in range(len(df.columns))]
+            f"column_{i}" for i in range(len(df.columns))]
        assert len(df) == 10
+        assert (len(df.columns) == 5)

    def test_no_header_file_with_cols(self):
        path = os.path.join(rsc, no_header_file)
@ -53,3 +58,10 @@ class TestOdsReader(object):
        assert isinstance(df, pd.DataFrame)
        assert len(df.columns) == 4
        assert "website.1" in df.columns
+
+    def test_header_file_col_len(self):
+        path = os.path.join(rsc, col_len_file)
+        df = read_ods(path, 1)
+        assert isinstance(df, pd.DataFrame)
+        assert len(df) == 10
+        assert (len(df.columns) == 5)