From d744ad526c564d9e1d51e67013a754869948ee0c Mon Sep 17 00:00:00 2001 From: iuvbio Date: Thu, 6 Jun 2019 12:00:28 +0200 Subject: [PATCH 1/3] add test for file with differing column lengths --- .../tests/rsc/example_col_lengths.ods | Bin 0 -> 10388 bytes pandas_ods_reader/tests/test_read_ods.py | 7 +++++++ 2 files changed, 7 insertions(+) create mode 100644 pandas_ods_reader/tests/rsc/example_col_lengths.ods diff --git a/pandas_ods_reader/tests/rsc/example_col_lengths.ods b/pandas_ods_reader/tests/rsc/example_col_lengths.ods new file mode 100644 index 0000000000000000000000000000000000000000..9dd3fa7ac1f3883097607c603e67d83e92720a52 GIT binary patch literal 10388 zcmc(FbyQr-vn~V(1PO#7!97TDLU0HYEVx_n!5Lr}bO`Pi9D)t*?(VKZgAeWyoDc{+ z_}zPM&N(mVth?SnuV(F;-h1Zzdb+w-RdrP>yhlL9frCSVgL9JWk@K_S31)_agL}B{ zAHi9hTLZu@wg3ZLTPt%T1F*Rbki`*b%xq&|Z*I?QV+#Np+ZZ`m1At&=ds`5|z}Vgl z001leg)_<(#W)HT4({Q4anGq@=3s3IG%&ZaX954wWVQvG1}Vr%V4xD-{|XL6N>WVe z{xkdjxQYhjDp&GB`swcX)SqK&3mYlvWl9vnuea1so~ zv97+6mJv|Z&_Npj(y@dXtIGhi)eQCZtPHh50Bu)*GQ{$Ov7wQXiKVfbrKO31wHd(D z(h^|hU@2|c_FxAWu(KNkZ0G9i2!TL!fS$UxzPk2-CXW7= z?osv-UmMS8h?hUeI|l5R;24&B;{5nf>!lqRduD;3})947F4$v*LM~+4&*kCFR_t#|)Hr}_Z##@Uz zJ6Zhh8oRnWyZeU+d)kKjI|uvwI!C5EzAOw* zEKCe_j*Se>j&#h8_f1bvk9}R3nwg%QU0R%-gu!6_GwT!ctK+b37;JfL<#1u+;LG~u z;_k`nWY_Z7k+s>OgW2wb`M$OJ@io}o_R`qi+U)7d$l3bD>DK)D-qPyE*6POg>h{6z z=IZ|T`tI)T+TO{|;qk`N<<`a3-tqa~+11YX>yv}c^W(j%v;DKvljDo4v#Xn{i{qQC z^V{3oxa&B5I5=ckDKTLc=edJH z*L2jB-BRQxsN7%ERdJxhID%Xb?85%ax!k!Zf$$V3U#6@Xo~e1SMi7vsy&4ypuOuA4 zRk8;%_h<0vDc$k@Jp2S{g&a0-Rf;aV)cNzX3^}9A%agmXddPg2^M)0Sjwf%w1az@6n$POyh2M!vbR`P2L?7@W z-#rwf-%_6=4S${_%vHi=n_w8=%LOZ`Ua$H`Kgmfv)!*p;D25`G6qYi#fik%o|Ed%I_&tq zc!TNEmYb{+ld`Xi)l7uH=&`d(H+3>z&Pc^PRwEDnGMKC4{~dRZU{tofr*s%uRt43k zLKWy(SiPkKp+4`;-MA9#i&!ziqyopY7e%msOLLjW+xzGyZE9W3czL*68o4qxL7r3R zV{qWL5TG5s051rDSn0@XLz#DRHXsYO*UO9<>hHaTqm4J}A#IR@|=#7OH&qR6JU0#$HO-DpTrNZs)7v28;TT zn)OcMw+wn$$o0)J4q@R-#RjuTK2TtB?lyx^h3gpN`p%Bb3+{QcF^XD3=XB}mhar8G=ACs)YdIZjcjk{_gIJO8jL;%opd|KyNkMHVa*&L=I&{)_5=aI z%&c<3m&A&3`oicF&EAnXGz$kbJ)@T^aNA5|##0i-YL^y>{=Cn?=<{p33KPS*=&*VQ z7ad_k24*ciWfOIZSF)xWkVi2PoeUAk_n6OU2q(|+rGv|lc_1d&((FeAL!~W}g%hd+ z+}9vj&$`sRS^AlUT?2F>6h~)3YR5?Ye}>+94SVv&5>d!4V(xp$nu+DNU)0^lj(#Zhh?A zwiWNxlPY@O7g^m{!vMF!rmzdG=S#FLTA!^aY@%Ro;+6av4_@ZwfztQ;o{Ay1dk*Fw!neO1>uuboEhGsr7n zZj!+=g>9vsJX9C@f`})VapnUW<>L+d&$`qz)0o~M(hJ;fsaX?b2-9Ft278@03(A{! zK55|YIJ-HdDLyuH#@P1fis?_{I6+t@m{y$ugs5+EJ)jRusYQT?6%OAe_q&U*SrL{m zEOO-orlnXsL4|7)x6M_9Z`Na7z}fhzh%sS>kvJOFW8NkG8KlwE*nP+FbMw`GnaWSk zhSthIXTbQi@FV)uLKiR{J6%idr-Sq-N_ywD7J#ppzwEkgkT$W?$_fn>)-xn1?*OIT znP&jg>x+m5+@`cmiYlWq^S5YyewI}jqP*H0^!Q|>N z8l_U!?Mm^(@59fq>&?THg$Z0Hm$zwAnp!hGHPtCrt&JIe6vFAa7or#{Fhma)`yc3Z zTsPKD??j{sJ^AR$faxWt_c)tg-WKcJk=KraDhXrSi-k#4ybwF|OaFMhX?Ij8v0kg2 zT=vgS;F)NPs9CjiaOw5-)aZ*%XQx^Ktwa2E!Zl1N;kXfWka#;6i#HIT<>n|C2fEd* z6kt^-d?MCr|6xN9F2XssnkAD*9!H8Pw=?Q6hTLON1=k8Rl~QpPuINIQ0NKt6VUl{T z>75^Sj(I0&Oj5{R@p-~5cm~vw~?b%ttHa1p<1|XJyMv?snWo~U? z3b1DpF$Y^4*xLUK_;+2JSlJkW0sqz|;cvRMwXt=uy^q-){-w9y;q1W%UcKkRv&t*sWd2SM|1a279Laqr9Iet3%`)tsj>LS5cyX3eq>o?{a8#(qiO z4o}{Hd;KU&GBP-8;dPCH1C>NuJwEcKtW>@8T>_h_tZx=SwBEh+8!z=I7m?HMp`UxF z-^^B9T5oo|bLqqSU#}c9P}PgDjmF;YneFJv6)t{m#J-9#lG=FPOAx7yEAJwFqGd5B zEHXGMPMZv)5pl_AuW*vJWP7ZPDtsjOOQ8iO4DDNoKm~LZkSJMl3 z(df9(>=v-e9LtcCT03jynpD=?-Eo>pcQUZ(Z@aJ@9IH>m5G2qM$Rp$GK6+_kC642N z!ZGtE+bwoD2rlXItD9JLG~q1&?Y+b{UwVr{@Tz4qoWxgGyRM;QY&6^C*kxdz?-PEN zjx44mgKGX&Sddd8*I@j7(g&R&bl13cM;Tl+EzeTT^!bzIQ~!czc~Q+-L}%9srLHg$ z8@UcHZw5-Oqwm!GJ&ZuckxEhV32xW~irDna^(1O`{|E`hN2PPXHg=wZAJF-v0B0%o zCWHwapJ&6_9akgl9o#G1vH_{&NcaV8GN>#qbK1h4*7@v?P2hw@!UilU5LUsCZz?kt zZ_rp`)bMD^oGULWWhr9y-i{P}>o)?K=%~5H##I(PSr6$e2KfSZ1p*&_JTZkzKTY~K z2WDmxtZR3|8#TGrufeDSG({t~o6^Wrw&CpgKElWXEICX&Kp566l}ySLaB9{y;boLy z9J@+@+x68F2c)k`cJW0GZFP@dTXT`APXV4}six$4sHYO~^O%T){=txf9g^vVUl%?o4h=NOH;Auu0!!(jZ z7!p(F==EdHmaKt#O3UOmzc`)|YfKc1OKRMHci%-4^`_T;r#qj7QUc^h1T>3Y;7Mqb zK6idov&431^YvKTZgJc!R9Zk*Kc+OLv3tSh>HJc95q^VcrD|MbwK3!wF7HT?LX0Hs z#zZ~&ceKRkBqv(wJPp%zel})?4VGAE{?UxDu4=cFSMi$gYHh#l1;Bcl+?ar!?xkcO zF}du9a=mRX0(WBH6j}^2GC%pi=+I7h=IdK0DSBG#U=i~51QN|Z4zkT0qzE0uNJ z>yB#f8jda;gzn7A^vf1DL~@*xtqahe_r`z|F&~+s&{YH zLr)L(bIxPy`_EgO!}RN{P@}2E&-Ix{M@X-1?OPSUf82BfCf=fmO_0rI6L@KeJ4%J? zGfKpnKh=-q;U@WP!+qX^@CB(Cq0v2|tpi}lqtjDnOQ6kcOi>i!SA#|{lsGo~5@!3T zVcFKOUaiwCoN)sE5!NtynGThHwv|@8Zk`diyl3o((ntk@=$gqCDRh<%@x+8)rDlY_)67f{p@U*mbc^+ z+APE`gkDd5ySOWy-e^^LkBssKsd*NL1P8ar2?zII&i!|O;Xmmz0NCKcDTGB2hbOV( z_+5TP6*EQac}uS08`-d(kysF$7}*d)eErm+x14GoeP!bI?R6o!y%qc7rWyE#sCDV1CRxr0l5Yt<}tqF!yn?EZlK$V?B+SngR;iIAr z=0L<2M-H(K9qR~xa>q5JQJP09rF@uirBtLU3$@BK( zi=lZv5@*6UgFE4DiLR(rF}B_VfdpxCNt^kQ>^I-VVv$rc8v|{U)jGG$Z2ex!+Tzl1 zOAmgGibRh#CQ>IDsWp$xK24qvPvoeuVDh*5#5SuR6>p~-qZ;%+dCII^{MNlswOs;1 z5K+y#i;sigjo#(X7P?20{f%=(lI_xyE3zEz)!8m~X#( zti(KL!V&U=o0kcMoslEXRr~4ir}?nW!-RXk3NLMhzfU^eXCD7~!XFZq_Fxw)fc<~s z04-~q`8Qa%EqWArZDsJ$^s`b#mgPRhA1ldR}Of%!#DTPJ4mg|yq z=cgEkkEyKN_cm@Fe$Ua-UF=zyQ~1ml$!KM>I)^74h@yebwBIV!cO+3L_8?84_Z`V3u8FlnJ|ku+CAy>h6s@gL9*ttel8tP`(x zrWS@fJ>$>VLLgKD|1}#SsMg4j6OX$%B*3uf$hs#*fm^JLUabhw&1Bl2>({KraQZV6U(f$e2(!&MolApgaZMXGr;%)%21icOk`9|82Amz6)j~G zT5IFzLbFzGWNzidK&i<)H%$B?9$!Qy$HJ>=CahlhtAOa0Jx22)8caGTuc*CWm%BY# zB{Zb&)9W@D6s)I<+c7L-N_+x&SHx0@sQC<{9mmlmWgNxQ4%vo+It2~2zT)gCuIc(C z=)$uiW86gIwWEagv+lQBeV)6gUv=R}-w`Uu!hLB>vI3|uA*XB53hO%Z5S+cqj@n@! z3QK@}Q5b!}{8T}+h=LO%EGSk{S513Esg#f+ncH6DkgiR9<3cm_KY)Pm#qq}_vX^CWLMl}ok z+BBmZn~3HkyTj`X0>&TVyeF1QE=k5YiI?uymydITC#(vbe4>$Kn^9}J1O}1z@}WA+ zIUFPT5 zei`JVqoz$gJFDh=`{qb{7teBQ~aL`R#c^^o~#!xX@txaC&9IR<*@)>3Q5guEt zF!#_~0p7vyU}Wdmc_U2I$AoydVTR3xFe)omO;zjTl+8BLe4`Ujw3!nD`{CX!{~*O4 zK_BlO%>oPSD}N!cVdTxmO(wt?s>vKtyN?#mAw~Al-rD5P?O5iwc$OsMO6$NgKb{QA$cz$FhjOVZdckX@vgOs)|?Bb9D-nlZzJ$Ig) z?jVi{7-H$6rAM)dUXVtZ-y zm|PogJDN`S6`SV^_FV;3Sf_z|3Q1vy2QDSIq?5rk1*cqK{xMtjcEc$2awxZ#vD>mi z4HQqAOPQVUy(fYl3QdlI9`{#@O|V5_iMXmS8eyMc#`UN3^}nIli#2MMq+j1LMO@oT z&zTgAWuD6iSrMG1t?Q!_G2Ks9UrL48huc@MXw3?S&9E8c~!Qn4VV}mW07znF#KGqhUqi%nm z0iDLjZ_1;MuR7Iayp~`oEh%AEY%DjE(x$HJDDj!mVa0O{beVR>$f(a3zk7eJ#BWx4 zYPpfE9A--vFQg)` zUFEeAJTw`~0i_e^8%201P$lRY>+8VvoDdgSP~uGy_vFO~$u6D%kv{qx8D`ohY)o}rXe25U@i8hq^E$49dLD8= zP;=v8Uz(cHy`nv^s9-<|rBsOyQTT|t&RJqMLt*lDF;>`N&RopU*s6l!@qTRvT{aZS z56uujX+Wde%-98utyHIhCFEpHIy%l+w>q9Okh}9qJZea{9?>NWC{V?vuc#1b{cgoc zW4`#+vL`T;z@OJgUs3?TMOTwbEG9)aIeLePzKlr0|B-sZhXx?XusI)v*Q-U+>O@(DGmbP&9k*NN`^X`u&8I=d zl$#uwWc_0)eL5d&au{nSW0FKVMn-@akpyA?TV{`N38^JQnWq8hC$sC7q^-S_&r{KB z7l=5$zb74MyPLDIEtWRnb6(N9XYv)L2}XqcZAfLc<`tW8r#J>rFk#aih=b|gGJqqCug{0NNA26fD2^c$t39(LObZ$G?|D-`rG*s}BOB%$o_4i*1#x}Qng z^-VDnA1p{gmhpxzhjHwK^hSu%PC;^e`KEph^RkjXjOH;$0~U7_@&+ZfG&TFgx~Kaf z)QzH^>UHg5s3EZ9=a1B&h`zPP3P_s)GDW3Ph;I*3a{W z#v%Pn?RqIz^7xV10Y>AO0_^h#owO4n&sStUHq$s)^LI@VQ;TlOOP+V`5p@k0TfKWh zvrV)3{%&7zD|VVzep!Lo-R`;25dli`jw#SKD-vlYL9x}i%>_gRqV}6EuQJ!84B_Lo zgfkRP7S%+wy zk81vp*H*PXUbpkjkGqfc&sI6D)Rl!zn^Fq8N7C{ZFB1#H#%f@4Dce6XnOpYLv-jR} zxH+wpO-x0MnHP@cw3tWyz>_Iav2-oO=v$oIVJr`;PA%UZ9{%K0*c-`aU6+{5aY0i5 zSmt%BP)Vt^p%s)?!@91yda-@RU6E_$u5!d=2P~erQB(hcaB=kIL9uE_%apkXC`Z5n zZ}s)ma(e!0uQ*nhWmWMM0kWKFx_~?J!u%p>Z4O>)zl&09>;~bC-LZsaUG%eI7B4Bx z%YYw0^FG*MNGDF(xDgy!E@Pz9J|&H{oh5WV*R4B#*UY05;?q8pKy+}dKSL<={`x#@ zI^1f+0^cKY_-$w{*I-W+p|erWpMhCQOoM7L0a z%Z7x69zq=^e_zL%EF<-2oO3;qro>X%`R9tNEsck=UFY znlz!4A9^5!i07Td;l4R14INy1WBB9lO$yg!4143<;Jg%%BnW(lb6vyO<^w3BW_ zo}&_5^kNFUB=CVjG;{$*haL)=@rbhTNy{LfhL9<*7rP-k7o=eaAYxNV%j5MldMXy4 zQ!5ufbqy2UlJN*dMc3u`8_&8GbhG9W^5a8ZHgHRpnl`5%=M6t!i7%<$*nX>WdscZw zbo{N?#smIA>$p7d#T@sJj}h@f>+p|o;Qsk&(7o#aB!3(f`aA0Hhkt%SRPLMqbQI|C zEWc3yb%f~GJ@I`y{+|mme;vUu6dc?`Ddv|b{f6cLD$M*d*+T*1mwfsS*{?#)KeId( zBz{TvZ&?0ZxcT4m{QM2ip9?wv%=1vn`6Zn9;m+R`ZTpwP&i|Il<~K}#E%f{|(_b~6 z{~M-Xg`a&|13Cr{Q zIQ+jehkr-?Ig)=!IQ3wi2fS&{~h^f%llU(ImurG-oGRN jY&{+<^DoJy{0BR&@E+-Yn*s;-?EVF~H#!8=4|o3u+k`3G literal 0 HcmV?d00001 diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index 4b7e843..a46ee68 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -10,6 +10,7 @@ rsc = os.path.join(root, "rsc") header_file = "example_headers.ods" no_header_file = "example_no_headers.ods" +col_len_file = "example_col_lengths.ods" class TestOdsReader(object): @@ -45,3 +46,9 @@ class TestOdsReader(object): df = read_ods(path, 1, headers=False, columns=columns) assert list(df.columns) == columns assert len(df) == 10 + + def test_header_file_col_len(self): + path = os.path.join(rsc, col_len_file) + df = read_ods(path, 1) + assert isinstance(df, pd.DataFrame) + assert len(df) == 10 From 9b4a272fd48cd59aa4607a40c430febede3fe88c Mon Sep 17 00:00:00 2001 From: iuvbio Date: Sat, 8 Jun 2019 14:00:46 +0200 Subject: [PATCH 2/3] exception handling and cosmetic changes --- pandas_ods_reader/parser.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index 1334eae..8b0ffbb 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -5,16 +5,16 @@ import pandas as pd from .tools import sanitize_df -def load_ods(doc, sheet, headers=True, columns=None): +def load_ods(doc, sheet_id, headers=True, columns=None): # convert the sheet to a pandas.DataFrame - if isinstance(sheet, int): - sheet = doc.sheets[sheet - 1] - elif isinstance(sheet, str): + if not isinstance(sheet_id, (int, str)): + raise ValueError("Sheet id has to be either `str` or `int`") + if isinstance(sheet_id, str): sheets = [sheet.name for sheet in doc.sheets] - if sheet not in sheets: - raise ValueError("There is no sheet named {}".format(sheet)) - sheet_idx = sheets.index(sheet) - sheet = doc.sheets[sheet_idx] + if sheet_id not in sheets: + raise ValueError("There is no sheet named {}".format(sheet_id)) + sheet_id = sheets.index(sheet_id) + 1 + sheet = doc.sheets[sheet_id - 1] df_dict = {} col_index = {} for i, row in enumerate(sheet.rows()): @@ -28,7 +28,7 @@ def load_ods(doc, sheet, headers=True, columns=None): continue elif i == 0: columns = columns if columns else ( - ["column_%s" % j for j in range(len(row))]) + [f"column_{j}" for j in range(len(row))]) # columns as lists in a dictionary df_dict = {column: [] for column in columns} # create index for the column headers @@ -49,8 +49,8 @@ def load_ods(doc, sheet, headers=True, columns=None): def read_ods(file_or_path, sheet, headers=True, columns=None): """ This function reads in the provided ods file and converts it to a - dictionary. The dictionary is converted to a DataFrame. Empty rows and - columns are dropped from the DataFrame, before it is returned. + dictionary. The dictionary is converted to a DataFrame. Trailing empty rows + and columns are dropped from the DataFrame, before it is returned. :param file_or_path: str the path to the ODS file From 46b31a9e8c5eecf6d00700e89a9e2b5f530cec52 Mon Sep 17 00:00:00 2001 From: iuvbio Date: Sat, 8 Jun 2019 14:01:10 +0200 Subject: [PATCH 3/3] additional checks for number of columns --- pandas_ods_reader/tests/test_read_ods.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index a46ee68..1e59e85 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -19,12 +19,14 @@ class TestOdsReader(object): df = read_ods(path, 1) assert isinstance(df, pd.DataFrame) assert len(df) == 10 + assert (len(df.columns) == 5) def test_header_file_with_str(self): path = os.path.join(rsc, header_file) df = read_ods(path, "Sheet1") assert isinstance(df, pd.DataFrame) assert len(df) == 10 + assert (len(df.columns) == 5) def test_header_file_with_cols(self): path = os.path.join(rsc, header_file) @@ -32,13 +34,15 @@ class TestOdsReader(object): df = read_ods(path, "Sheet1", columns=columns) assert list(df.columns) == columns assert len(df) == 10 + assert (len(df.columns) == 5) def test_no_header_file_no_cols(self): path = os.path.join(rsc, no_header_file) df = read_ods(path, 1, headers=False) assert list(df.columns) == [ - "column_%s" % i for i in range(len(df.columns))] + f"column_{i}" for i in range(len(df.columns))] assert len(df) == 10 + assert (len(df.columns) == 5) def test_no_header_file_with_cols(self): path = os.path.join(rsc, no_header_file) @@ -46,9 +50,11 @@ class TestOdsReader(object): df = read_ods(path, 1, headers=False, columns=columns) assert list(df.columns) == columns assert len(df) == 10 + assert (len(df.columns) == 5) def test_header_file_col_len(self): path = os.path.join(rsc, col_len_file) df = read_ods(path, 1) assert isinstance(df, pd.DataFrame) assert len(df) == 10 + assert (len(df.columns) == 5)