|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 | import os |
| 18 | +import re |
18 | 19 | from typing import Any |
19 | 20 |
|
20 | 21 | import pyarrow as pa |
@@ -1245,13 +1246,17 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: |
1245 | 1246 | def test_dataframe_repr_html(df) -> None: |
1246 | 1247 | output = df._repr_html_() |
1247 | 1248 |
|
1248 | | - ref_html = """<table border='1'> |
1249 | | - <tr><th>a</td><th>b</td><th>c</td></tr> |
1250 | | - <tr><td>1</td><td>4</td><td>8</td></tr> |
1251 | | - <tr><td>2</td><td>5</td><td>5</td></tr> |
1252 | | - <tr><td>3</td><td>6</td><td>8</td></tr> |
1253 | | - </table> |
1254 | | - """ |
| 1249 | + # Since we've added a fair bit of processing to the html output, lets just verify |
| 1250 | + # the values we are expecting in the table exist. Use regex and ignore everything |
| 1251 | + # between the <th></th> and <td></td>. We also don't want the closing > on the |
| 1252 | + # td and th segments because that is where the formatting data is written. |
1255 | 1253 |
|
1256 | | - # Ignore whitespace just to make this test look cleaner |
1257 | | - assert output.replace(" ", "") == ref_html.replace(" ", "") |
| 1254 | + headers = ["a", "b", "c"] |
| 1255 | + headers = [f"<th(.*?)>{v}</th>" for v in headers] |
| 1256 | + header_pattern = "(.*?)".join(headers) |
| 1257 | + assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 |
| 1258 | + |
| 1259 | + body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] |
| 1260 | + body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner] |
| 1261 | + body_pattern = "(.*?)".join(body_lines) |
| 1262 | + assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 |
0 commit comments