Skip to content

Commit

Permalink
Allow parse to pass through already matched type. (#12016)
Browse files Browse the repository at this point in the history
- `Table.parse` and `Column.parse` will allow the target type to be returned unchanged.
- Fix widget for `Excel_Workbook.read_many`.
- Fix ALIAS for `DB_Table.sort`.
- Fix `Table.text_cleanse` widget by adding a default value.
- Expose `In_Any_Warn_On_Missing` for `read_many` as wanted by user in review.
  • Loading branch information
jdunkerley authored Jan 8, 2025
1 parent b72eb92 commit da2898e
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -270,32 +270,34 @@ type Filter_Condition
builder.append (Option "Greater Than Or Equal" "..Equal_Or_Greater" [["than", comparable_types]])
builder.append (Option "Between" "..Between" [["lower", comparable_types], ["upper", comparable_types]])

if include_numeric then
builder.append (Option "Is Finite" "..Is_Finite")
builder.append (Option "Is Infinite" "..Is_Infinite")
builder.append (Option "Is NaN" "..Is_Nan")
builder.append (Option "Is Not NaN" "..Not_Nan")

if include_boolean then
builder.append (Option "Is True" "..Is_True")
builder.append (Option "Is False" "..Is_False")

if include_nullable then
builder.append (Option "Is Nothing" "..Is_Nothing")
builder.append (Option "Is Not Nothing" "..Not_Nothing")

if include_text then
builder.append (Option "Equals Ignore Case" "..Equal_Ignore_Case" [["to", Widget.Text_Input]])
builder.append (Option "Starts With" "..Starts_With" [["prefix", Widget.Text_Input]])
builder.append (Option "Ends With" "..Ends_With" [["suffix", Widget.Text_Input]])
builder.append (Option "Contains" "..Contains" [["substring", Widget.Text_Input]])
builder.append (Option "Is Empty" "..Is_Empty")
builder.append (Option "Is Not Empty" "..Not_Empty")
builder.append (Option "Like" "..Like" [["pattern", Widget.Text_Input]])

if include_nullable then
builder.append (Option "Is Nothing" "..Is_Nothing")
builder.append (Option "Is Not Nothing" "..Not_Nothing")

if include_boolean then
builder.append (Option "Is True" "..Is_True")
builder.append (Option "Is False" "..Is_False")

value_editor = Widget.Vector_Editor item_editor=equatable_types display=Display.Always item_default='""'
builder.append (Option "Is In" "..Is_In" [["values", value_editor]])

if include_numeric then
builder.append (Option "Is Finite" "..Is_Finite")
builder.append (Option "Is Infinite" "..Is_Infinite")
builder.append (Option "Is NaN" "..Is_Nan")
builder.append (Option "Is Not NaN" "..Not_Nan")

if include_text then
builder.append (Option "Is Empty" "..Is_Empty")
builder.append (Option "Is Not Empty" "..Not_Empty")

make_single_choice options display=display

## Specifies the action of a Filter_Condition.
Expand Down
12 changes: 7 additions & 5 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1912,11 +1912,13 @@ type DB_Column
parse self type:(Value_Type | Auto) format:(Text | Data_Formatter)="" on_problems:Problem_Behavior=..Report_Warning =
if type == Auto then Error.throw (Unsupported_Database_Operation.Error "`Auto` parse type") else
if format != "" then Error.throw (Unsupported_Database_Operation.Error "Custom formatting") else
Value_Type.expect_text self <|
## In the future we may have some specific logic, for example
allowing to support formatting settings. For now, the
Database parse just boils down to a simple CAST.
self.internal_do_cast type on_problems
if self.value_type.is_same_type type then self else
Value_Type.expect_text self <|
## In the future we may have some specific logic, for example
allowing to support formatting settings. For now, the
Database parse just boils down to a simple CAST.
self.internal_do_cast type on_problems


## GROUP Standard.Base.Conversions
ICON convert
Expand Down
14 changes: 8 additions & 6 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1021,8 +1021,7 @@ type DB_Table
Error.throw (Unsupported_Database_Operation.Error "add_group_number")


## ALIAS order_by
GROUP Standard.Base.Selections
## GROUP Standard.Base.Selections
ICON select_row

Returns a new Table that will include at most `max_rows` rows from the
Expand Down Expand Up @@ -1313,7 +1312,7 @@ type DB_Table
table = self.connection.read_statement sql column_types last_row_only=True
table.rows.first

## ALIAS sort
## ALIAS order_by
GROUP Standard.Base.Selections
ICON order
Sorts the rows of the table according to the specified columns and order.
Expand Down Expand Up @@ -2320,8 +2319,11 @@ type DB_Table
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type:(Value_Type | Auto) format:(Text | Data_Formatter)='' error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning =
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column
selected.fold self table-> column_to_parse->
new_column = column_to_parse.parse type format on_problems
table.set new_column as=column_to_parse.name set_mode=Set_Mode.Update
column_value_type = column_to_parse.value_type
already_parsed = Auto != type && column_value_type.is_same_type type
if already_parsed then table else
new_column = column_to_parse.parse type format on_problems
table.set new_column as=column_to_parse.name set_mode=Set_Mode.Update

## GROUP Standard.Base.Conversions
ICON convert
Expand Down Expand Up @@ -3091,7 +3093,7 @@ type DB_Table
@from (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True)
@remove make_data_cleanse_vector_selector
text_cleanse : Vector (Integer | Text | Regex | By_Type) -> Vector Named_Pattern -> DB_Table
text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove =
text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove:Vector=[] =
Feature.Text_Cleanse.if_supported_else_throw self.connection.dialect "text_cleanse" <|
transformer col = col.text_cleanse remove
Table_Helpers.replace_columns_with_transformed_columns self from transformer
Expand Down
23 changes: 12 additions & 11 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1835,17 +1835,18 @@ type Column
@format (make_format_chooser include_number=False)
parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column
parse self type:(Value_Type | Auto)=Auto format:(Text | Data_Formatter)="" on_problems:Problem_Behavior=..Report_Warning =
Value_Type.expect_text self <|
formatter = case format of
_ : Text -> if format == "" then Data_Formatter.Value else Data_Formatter.Value.with_format type format
_ : Data_Formatter -> format
parser = formatter.make_value_type_parser type
storage = self.java_column.getStorage

new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator self.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)
if type != Auto && self.value_type.is_same_type type then self else
Value_Type.expect_text self <|
formatter = case format of
_ : Text -> if format == "" then Data_Formatter.Value else Data_Formatter.Value.with_format type format
_ : Data_Formatter -> format
parser = formatter.make_value_type_parser type
storage = self.java_column.getStorage

new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator self.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)

## GROUP Standard.Base.Conversions
ICON convert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ type Columns_To_Keep
padded with `Nothing` and a problem is reported.
In_List (column_names : Vector Text = Missing_Argument.throw "column_names")

## PRIVATE
Same as `In_Any`, but it will warn about columns that are not present in
all tables.
## All columns are kept, but it will warn about columns that are not
present in all tables.

If a column is present only in some of the tables, it is padded with
`Nothing` for tables where it is missing.
In_Any_Warn_On_Missing

## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.Data.Read.Return_As.Return_As as Return_As_Base
import Standard.Base.Data.Vector.No_Wrap
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Deprecated.Deprecated
Expand All @@ -12,6 +13,7 @@ import Standard.Base.Runtime.Ref.Ref
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Data.Filter_Condition import sql_like_to_regex
from Standard.Base.Metadata import make_single_choice
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Multiple_Choice, Single_Choice

Expand Down Expand Up @@ -359,7 +361,8 @@ type Excel_Workbook
- on_problems: how to handle problems during reading. Defaults to
`Report_Warning`.
@sheet_names build_sheet_selector
read_many : Vector Text -> Headers -> Return_As_Table -> Problem_Behavior -> Table
@return make_single_choice Return_As_Table.get_dropdown_options
read_many : Vector Text -> Headers -> Return_As_Base -> Problem_Behavior -> Table
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return=..As_Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
resolved_return = _resolve_return_as_compatibility return
if sheet_names.is_empty then Error.throw (Illegal_Argument.Error "No sheets selected.") else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,21 @@ make_filter_condition_selector table display:Display=..Always =
builder.append (Option "Starts With" "..Starts_With" [["prefix", with_text]])
builder.append (Option "Ends With" "..Ends_With" [["suffix", with_text]])
builder.append (Option "Contains" "..Contains" [["substring", with_text]])
builder.append (Option "Like" "..Like" [["pattern", with_text]])
builder.append (Option "Is Nothing" "..Is_Nothing")
builder.append (Option "Is Not Nothing" "..Not_Nothing")
builder.append (Option "Is True" "..Is_True")
builder.append (Option "Is False" "..Is_False")

value_editor = Widget.Vector_Editor item_editor=equatable_types display=Display.Always item_default='""'
builder.append (Option "Is In" "..Is_In" [["values", value_editor]])

builder.append (Option "Is Finite" "..Is_Finite")
builder.append (Option "Is Infinite" "..Is_Infinite")
builder.append (Option "Is NaN" "..Is_Nan")
builder.append (Option "Is Not NaN" "..Not_Nan")
builder.append (Option "Is True" "..Is_True")
builder.append (Option "Is False" "..Is_False")
builder.append (Option "Is Empty" "..Is_Empty")
builder.append (Option "Is Not Empty" "..Not_Empty")
builder.append (Option "Like" "..Like" [["pattern", with_text]])

value_editor = Widget.Vector_Editor item_editor=equatable_types display=Display.Always item_default='""'
builder.append (Option "Is In" "..Is_In" [["values", value_editor]])
Single_Choice options display=display

## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ type Return_As_Table
Each file is loaded according to the provided/detected format. If the
format reads it as something else than a table, then it is expanded in
the same way as `Table.from_objects`.
@columns_to_keep Columns_To_Keep.default_widget
As_Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name)
As_Merged_Table (columns_to_keep : Columns_To_Keep = ..In_Any) (match : Match_Columns = ..By_Name)

## PRIVATE
get_dropdown_options : Vector Option
Expand Down
17 changes: 10 additions & 7 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1559,12 +1559,15 @@ type Table
Dictionary.from_vector <| selected_columns.map c-> [c.name, True]

new_columns = self.columns.map on_problems=No_Wrap.Value column-> if selected_column_names.contains_key column.name . not then column else
Value_Type.expect_text column <|
storage = column.java_column.getStorage
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator column.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new column.name new_storage)
column_value_type = column.value_type
already_parsed = Auto != type && column_value_type.is_same_type type
if already_parsed then column else
Value_Type.expect_text column <|
storage = column.java_column.getStorage
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator column.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new column.name new_storage)
Table.new new_columns

## GROUP Standard.Base.Conversions
Expand Down Expand Up @@ -3740,7 +3743,7 @@ type Table
@from (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True)
@remove make_data_cleanse_vector_selector
text_cleanse : Vector (Integer | Text | Regex | By_Type) -> Vector Named_Pattern -> Table
text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove =
text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove:Vector=[] =
transformer col = col.text_cleanse remove
Table_Helpers.replace_columns_with_transformed_columns self from transformer

Expand Down
41 changes: 33 additions & 8 deletions test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -557,37 +557,56 @@ add_conversion_specs suite_builder setup =
suite_builder.group prefix+"(Conversion_Spec) Simple variant of Table/Column.parse in all backends" group_builder->
group_builder.specify "should be able to parse simple integers" <|
t = table_builder [["X", ["42", "0", "-1"]]]
v = [42, 0, -1]

c1 = t.at "X" . parse Value_Type.Integer
setup.expect_integer_type c1
c1.to_vector . should_equal [42, 0, -1]
c1.to_vector . should_equal v

c2 = t.parse ["X"] Value_Type.Integer . at "X"
setup.expect_integer_type c2
c2.to_vector . should_equal [42, 0, -1]
c2.to_vector . should_equal v

t2 = table_builder [["X", v]]
c3 = t2.parse ["X"] Value_Type.Integer . at "X"
setup.expect_integer_type c3
c3.to_vector . should_equal v

group_builder.specify "should be able to parse simple floats" <|
t = table_builder [["X", ["42.5", "0.25", "-1.0"]]]
v = [42.5, 0.25, -1.0]

c1 = t.at "X" . parse Value_Type.Float
c1.value_type.is_floating_point . should_be_true
c1.to_vector . should_equal [42.5, 0.25, -1.0]
c1.to_vector . should_equal v

c2 = t.parse ["X"] Value_Type.Float . at "X"
c2.value_type.is_floating_point . should_be_true
c2.to_vector . should_equal [42.5, 0.25, -1.0]
c2.to_vector . should_equal v

t2 = table_builder [["X", v]]
c3 = t2.parse ["X"] Value_Type.Float . at "X"
c3.value_type.is_floating_point . should_be_true
c3.to_vector . should_equal v

if supports_dates then
group_builder.specify "should be able to parse dates using a default format" <|
t = table_builder [["X", ["2018-01-01", "2023-12-31"]]]
v = [Date.new 2018 1 1, Date.new 2023 12 31]

c1 = t.at "X" . parse Value_Type.Date
c1.value_type.should_equal Value_Type.Date
c1.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31]
c1.to_vector . should_equal v

c2 = t.parse ["X"] Value_Type.Date . at "X"
c2.value_type.should_equal Value_Type.Date
c2.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31]
c2.to_vector . should_equal v

t2 = table_builder [["X", v]]
c3 = t2.parse ["X"] Value_Type.Date . at "X"
c3.value_type.should_equal Value_Type.Date
c3.to_vector . should_equal v

if supports_dates.not then
group_builder.specify "should report that date parsing is unsupported" <|
t = table_builder [["X", ["2018-01-01", "2023-12-31"]]]
Expand All @@ -600,14 +619,20 @@ add_conversion_specs suite_builder setup =

group_builder.specify "should be able to parse booleans with default format" <|
t = table_builder [["X", ["true", "false", "true"]]]
v = [True, False, True]

c1 = t.at "X" . parse Value_Type.Boolean
c1.value_type.should_equal Value_Type.Boolean
c1.to_vector . should_equal [True, False, True]
c1.to_vector . should_equal v

c2 = t.parse ["X"] Value_Type.Boolean . at "X"
c2.value_type.should_equal Value_Type.Boolean
c2.to_vector . should_equal [True, False, True]
c2.to_vector . should_equal v

t2 = table_builder [["X", v]]
c3 = t2.parse ["X"] Value_Type.Boolean . at "X"
c3.value_type.should_equal Value_Type.Boolean
c3.to_vector . should_equal v

group_builder.specify "should report missing columns" <|
t = table_builder [["X", ["42", "0", "-1"]]]
Expand Down

0 comments on commit da2898e

Please sign in to comment.