Skip to content

Commit

Permalink
In-Memory Refactor Part 3 (#12096)
Browse files Browse the repository at this point in the history
- Single method to make `Column` objects.
- Remove `Java_Storage` and `Java_Column` use from everywhere which isn't `Column`.
- Reviewed and cleaned up usage of Storage by enso code.
  • Loading branch information
jdunkerley authored Jan 22, 2025
1 parent 0034798 commit 3587ca2
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import Standard.Table.Internal.Vector_Builder.Vector_Builder
from Standard.Table import Aggregate_Column, Column, Value_Type
from Standard.Table.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import Inexact_Type_Coercion
from Standard.Table.Internal.Storage import get_storage_for_column

import Standard.Database.Connection.Connection.Connection
import Standard.Database.DB_Column.DB_Column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import Standard.Table.Internal.Vector_Builder.Vector_Builder
from Standard.Table import Aggregate_Column, Column, Value_Type
from Standard.Table.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import Inexact_Type_Coercion
from Standard.Table.Internal.Storage import get_storage_for_column

import Standard.Database.Connection.Connection.Connection
import Standard.Database.DB_Column.DB_Column
Expand Down
81 changes: 30 additions & 51 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ type Column
case needs_polyglot_conversion of
True -> Java_Column.fromItems name (enso_to_java_maybe items) expected_storage_type java_problem_aggregator
False -> Java_Column.fromItemsNoDateConversion name items expected_storage_type java_problem_aggregator
result = Column.Value java_column . throw_on_warning Conversion_Failure
result = Column.from_java_column java_column . throw_on_warning Conversion_Failure
result.catch Conversion_Failure error->
if error.example_values.is_empty then result else
raise_invalid_value_type_error error.example_values.first
Expand All @@ -128,7 +128,14 @@ type Column
from_storage : Text -> Java_Storage -> Column
from_storage name storage =
Invalid_Column_Names.handle_java_exception <|
Column.Value (Java_Column.new name storage)
java_column = Java_Column.new name storage
Column.from_java_column java_column

## PRIVATE
Creates a new column given a Java Column object.
from_java_column : Java_Column -> Column
from_java_column java_column =
Column.Value java_column

## PRIVATE
ADVANCED
Expand All @@ -143,7 +150,7 @@ type Column
Invalid_Column_Names.handle_java_exception <| Illegal_Argument.handle_java_exception <|
java_column = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
Java_Column.fromRepeatedItem name item repeats java_problem_aggregator
Column.Value java_column
Column.from_java_column java_column

## PRIVATE

Expand Down Expand Up @@ -828,7 +835,7 @@ type Column
rs = Panic.catch No_Such_Method handler=handle_no_iif <|
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
s.iif true_val false_val storage_type java_problem_aggregator
Column.Value (Java_Column.new new_name rs)
Column.from_storage new_name rs

## PRIVATE

Expand Down Expand Up @@ -1197,7 +1204,7 @@ type Column
storage.fillMissingFrom other_storage storage_type java_problem_aggregator
_ ->
storage.fillMissing default storage_type java_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)
Column.from_storage self.name new_storage

## ALIAS fill empty, if_empty
GROUP Standard.Base.Values
Expand Down Expand Up @@ -1846,7 +1853,7 @@ type Column
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator self.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)
Column.from_storage self.name new_storage

## GROUP Standard.Base.Conversions
ICON convert
Expand Down Expand Up @@ -2175,7 +2182,7 @@ type Column
rename : Text -> Column ! Illegal_Argument
rename self name = naming_helper.ensure_name_is_valid name <|
Illegal_Argument.handle_java_exception <|
Column.Value (self.java_column.rename name)
Column.from_java_column (self.java_column.rename name)

## GROUP Standard.Base.Metadata
ICON metadata
Expand Down Expand Up @@ -2284,8 +2291,7 @@ type Column
valid_index = (index >= 0) && (index < self.length)
if valid_index.not then default else
storage = self.java_column.getStorage
if storage.isNothing index then Nothing else
java_to_enso <| storage.getItemBoxed index
java_to_enso <| storage.getItemBoxed index

## PRIVATE
ICON data_input
Expand Down Expand Up @@ -2428,7 +2434,7 @@ type Column
rule = OrderBuilder.OrderRule.new self.java_column order_bool missing_last
mask = OrderBuilder.buildOrderMask [rule]
new_col = self.java_column.applyMask mask
Column.Value new_col
Column.from_java_column new_col
_ ->
wrapped a b = case a of
Nothing -> if b.is_nothing then Ordering.Equal else if missing_last then Ordering.Greater else Ordering.Less
Expand Down Expand Up @@ -2493,7 +2499,7 @@ type Column
length = self.length
offset = (start.min length).max 0
limit = ((end - offset).min (length - offset)).max 0
Column.Value (self.java_column.slice offset limit)
Column.from_java_column (self.java_column.slice offset limit)

## GROUP Standard.Base.Selections
ICON parse3
Expand Down Expand Up @@ -2541,7 +2547,7 @@ type Column
reverse : Column
reverse self =
mask = OrderMask.reverse self.length
Column.Value (self.java_column.applyMask mask)
Column.from_java_column (self.java_column.applyMask mask)

## GROUP Standard.Base.Metadata
ICON metadata
Expand All @@ -2555,7 +2561,8 @@ type Column

example_duplicate_count = Examples.integer_column.duplicate_count
duplicate_count : Column
duplicate_count self = Column.Value self.java_column.duplicateCount
duplicate_count self =
Column.from_java_column self.java_column.duplicateCount

## PRIVATE
Provides a simplified text representation for display in the REPL and errors.
Expand Down Expand Up @@ -2647,7 +2654,7 @@ run_vectorized_many_op column name fallback_fn operands new_name=Nothing skip_nu
current.vectorizedOrFallbackZip name problem_builder fallback_fn operand.java_column.getStorage skip_nulls storage_type
_ -> Polyglot_Helpers.handle_polyglot_dataflow_errors <|
current.vectorizedOrFallbackBinaryMap name problem_builder fallback_fn operand skip_nulls storage_type
Column.Value (Java_Column.new effective_new_name folded)
Column.from_storage effective_new_name folded

## PRIVATE

Expand Down Expand Up @@ -2677,12 +2684,12 @@ run_vectorized_binary_op column name operand new_name=Nothing fallback_fn=Nothin
s2 = col2.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackZip name problem_builder fallback_fn s2 skip_nulls storage_type
Column.Value (Java_Column.new effective_new_name rs)
Column.from_storage effective_new_name rs
_ ->
s1 = column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackBinaryMap name problem_builder fallback_fn (enso_to_java operand) skip_nulls storage_type
Column.Value (Java_Column.new effective_new_name rs)
Column.from_storage effective_new_name rs

## PRIVATE

Expand All @@ -2708,7 +2715,7 @@ run_vectorized_ternary_op column name operand0 operand1 new_name=Nothing expecte
s1 = column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedTernaryMap name problem_builder operand0 operand1 skip_nulls storage_type
Column.Value (Java_Column.new effective_new_name rs)
Column.from_storage effective_new_name rs

## PRIVATE
Runs a binary operation over the provided column and operand which may be
Expand Down Expand Up @@ -2736,7 +2743,7 @@ run_binary_op column function operand new_name skip_nulls=True expected_result_t
_ ->
Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s.binaryMap function operand skip_nulls storage_type problem_builder
Column.Value (Java_Column.new new_name new_storage)
Column.from_storage new_name new_storage

## PRIVATE

Expand Down Expand Up @@ -2770,46 +2777,18 @@ run_vectorized_binary_op_with_fallback_problem_handling column name operand fall
s2 = col2.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackZip name problem_builder applied_fn s2 skip_nulls storage_type
Column.Value (Java_Column.new new_name rs)
Column.from_storage new_name rs
_ ->
s1 = column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackBinaryMap name problem_builder applied_fn (enso_to_java operand) skip_nulls storage_type
Column.Value (Java_Column.new new_name rs)

## PRIVATE

Gets a textual representation of the item at position `ix` in `column`.

Arguments:
- column: The column to get the item from.
- ix: The index in the column from which to get the item.
get_item_as_text : Column -> Integer -> Text
get_item_as_text column ix =
item = column.getItemBoxed ix
## TODO This special handling of `Text` is because `"a".to_text` evaluates
to "'a'" and not just "a". The code can be simplified once the following
task is implemented:
https://www.pivotaltracker.com/story/show/181499256
case item of
_ : Text -> normalize_text_for_display item
_ -> item.pretty

## PRIVATE
Ensures that the text can be safely displayed in a terminal.

If the string contains special characters, it will be wrapped in quotes and
the characters escaped. Otherwise, the string is returned as-is.
normalize_text_for_display text =
prettified = text.pretty
just_quoted = "'" + text + "'"
if prettified == just_quoted then text else prettified
Column.from_storage new_name rs

## PRIVATE
A helper to create a new table consisting of slices of the original table.
slice_ranges column ranges =
normalized = normalize_ranges ranges
Column.Value (column.java_column.slice normalized)
Column.from_java_column (column.java_column.slice normalized)

## PRIVATE
Creates a storage builder suitable for building a column for the provided
Expand Down Expand Up @@ -2928,7 +2907,7 @@ apply_unary_operation column:Column operation:UnaryOperation new_name:Text|Nothi
used_name = new_name.if_nothing (naming_helper.function_name operation.getName [column])
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning java_problem_aggregator->
java_column = UnaryOperation.apply column.java_column operation used_name java_problem_aggregator
if java_column.is_nothing then if_unsupported else Column.Value java_column
if java_column.is_nothing then if_unsupported else Column.from_java_column java_column

## PRIVATE
Applies a function to every row in the column.
Expand All @@ -2945,7 +2924,7 @@ apply_unary_map column:Column new_name:Text function expected_result_type:Value_
storage_type = resolve_storage_type expected_result_type
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning java_problem_aggregator->
map_column = UnaryOperation.mapFunction column.java_column function nothing_unchanged storage_type new_name java_problem_aggregator
Column.Value map_column
Column.from_java_column map_column

## PRIVATE
Many_Files_List.from (that : Column) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ import project.Value_Type.Value_Type

polyglot java import java.lang.IllegalArgumentException
polyglot java import java.time.temporal.UnsupportedTemporalTypeException
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
polyglot java import org.enso.table.data.table.Column as Java_Column

## PRIVATE
Create a formatter for the specified `Value_Type`.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Standard.Base import all

import project.Table.Table
from project.Column import get_item_as_text, normalize_text_for_display

polyglot java import java.lang.System as Java_System

Expand All @@ -22,12 +21,11 @@ polyglot java import java.lang.System as Java_System
codes for rich formatting in the terminal.
display_table (table : Table) (add_row_index : Boolean) (max_rows_to_show : Integer) (all_rows_count : Integer) (format_terminal : Boolean) -> Text =
cols = Vector.from_polyglot_array table.java_table.getColumns
col_names = cols.map .getName . map normalize_text_for_display
col_names = cols.map .getName . map _normalize_text_for_display
col_vals = cols.map .getStorage
display_rows = table.row_count.min max_rows_to_show
rows = Vector.new display_rows row_num->
cols = col_vals.map col->
if col.isNothing row_num then "Nothing" else get_item_as_text col row_num
cols = col_vals.map col-> _get_item_as_text col row_num
if add_row_index then [row_num.to_text] + cols else cols
table_text = case add_row_index of
True -> print_table [""]+col_names rows 1 format_terminal
Expand Down Expand Up @@ -87,3 +85,23 @@ pad txt len =
ansi_bold : Boolean -> Text -> Text
ansi_bold enabled txt =
if enabled && (Java_System.console != Nothing) then '\e[1m' + txt + '\e[m' else txt

## PRIVATE
Gets a textual representation of the item at position `ix` in `storage`.
private _get_item_as_text storage ix =
item = storage.getItemBoxed ix
## Special handling for display of Text to avoid quotes when not necessary.
case item of
_ : Text -> _normalize_text_for_display item
_ -> item.pretty


## PRIVATE
Ensures that the text can be safely displayed in a terminal.

If the string contains special characters, it will be wrapped in quotes and
the characters escaped. Otherwise, the string is returned as-is.
private _normalize_text_for_display text =
prettified = text.pretty
just_quoted = "'" + text + "'"
if prettified == just_quoted then text else prettified
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ polyglot java import org.enso.table.data.column.builder.Builder
polyglot java import org.enso.table.data.column.builder.BuilderForBoolean
polyglot java import org.enso.table.data.column.builder.BuilderForDouble
polyglot java import org.enso.table.data.column.builder.BuilderForLong
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
polyglot java import org.enso.table.problems.ProblemAggregator

## PRIVATE
Expand Down
10 changes: 4 additions & 6 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ polyglot java import java.util.UUID
polyglot java import org.enso.base.ObjectComparator
polyglot java import org.enso.table.data.index.MultiValueIndex
polyglot java import org.enso.table.data.mask.OrderMask
polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.data.table.join.conditions.Between as Java_Join_Between
polyglot java import org.enso.table.data.table.join.conditions.Equals as Java_Join_Equals
polyglot java import org.enso.table.data.table.join.conditions.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
Expand Down Expand Up @@ -342,8 +341,7 @@ type Table
at : Integer | Text -> Column ! No_Such_Column | Index_Out_Of_Bounds
at self (selector:(Integer | Text)=0) = case selector of
_ : Integer ->
java_columns = Vector.from_polyglot_array self.java_table.getColumns
Column.Value (java_columns.at selector)
Column.from_java_column (self.java_table.getColumns.at selector)
_ -> self.get selector (Error.throw (No_Such_Column.Error selector))

## ICON select_column
Expand Down Expand Up @@ -386,7 +384,7 @@ type Table
java_column = case selector of
_ : Integer -> Vector.from_polyglot_array self.java_table.getColumns . get selector
_ : Text -> self.java_table.getColumnByName selector
if java_column.is_nothing then if_missing else Column.Value java_column
if java_column.is_nothing then if_missing else Column.from_java_column java_column

## ALIAS cell value, get cell
GROUP Standard.Base.Selections
Expand Down Expand Up @@ -1569,7 +1567,7 @@ type Table
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
parse_problem_aggregator = ParseProblemAggregator.make java_problem_aggregator column.name type
parser.parseColumn storage parse_problem_aggregator
Column.Value (Java_Column.new column.name new_storage)
Column.from_storage column.name new_storage
Table.new new_columns

## GROUP Standard.Base.Conversions
Expand Down Expand Up @@ -2599,7 +2597,7 @@ type Table
columns : Vector
columns self = Vector.from_polyglot_array <|
Array_Proxy.new self.java_table.getColumns.length i->
Column.Value (self.java_table.getColumns.at i)
Column.from_java_column (self.java_table.getColumns.at i)

## GROUP Standard.Base.Metadata
ICON metadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,5 @@ type Hyper_Table
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
row_count = if max_rows == Rows_To_Read.All_Rows then Nothing else max_rows.rows
java_columns = HyperReader.readTable self.file.file.path self.schema self.table row_count java_problem_aggregator
enso_columns = java_columns.map c-> Column.from_storage c.getName c.getStorage
enso_columns = java_columns.map Column.from_java_column
Table.new enso_columns
2 changes: 1 addition & 1 deletion test/Exploratory_Benchmarks/src/Table/Enso_Callback.enso
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ type Primitive_Enso_Callback_Test
expected_storage_type = Storage.from_value_type_strict Value_Type.Integer
java_column = Java_Problems.with_problem_aggregator ..Report_Error java_problem_aggregator->
Java_Column.fromItemsNoDateConversion "result" mapped expected_storage_type java_problem_aggregator
Column.from_storage java_column.getName java_column.getStorage
Column.from_java_column java_column

enso_map_with_builder_2_calls_unboxed self =
n = self.int_column.length
Expand Down
2 changes: 1 addition & 1 deletion test/Exploratory_Benchmarks/src/Table/Helpers.enso
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ column_from_vector name items convert_polyglot_dates =
Java_Column.fromItems name items expected_storage_type java_problem_aggregator
False ->
Java_Column.fromItemsNoDateConversion name items expected_storage_type java_problem_aggregator
Column.from_storage java_column.getName java_column.getStorage
Column.from_java_column java_column

check_results results =
mapped = results.map x-> case x of
Expand Down

0 comments on commit 3587ca2

Please sign in to comment.