5b48f9a9c097d26d395873044ceaa1a0b886682a,solutionbox/code_free_ml/mltoolbox/code_free_ml/analyze.py,,run_local_analysis,#Any#Any#Any#Any#,291
Before Change
parsed_line = dict(zip(header, line))
num_examples += 1
for col_schema in schema:
col_name = col_schema["name" ]
col_type = col_schema["type" ].lower()
transform = features[col_name]["transform" ]
if transform == constant.TARGET_TRANSFORM:
if col_type == constant.STRING_SCHEMA:
transform = constant.ONE_HOT_TRANSFORM
elif col_type in constant.NUMERIC_SCHEMA:
transform = constant.IDENTITY_TRANSFORM
else :
raise ValueError ("Unknown schema type" )
if transform in constant.TEXT_TRANSFORMS:
split_strings = parsed_line[col_name].split(" " )
for one_label in set (split_strings) :
if one_label:
vocabs[col_name][one_label] + = 1
elif transform in constant.CATEGORICAL_TRANSFORMS:
if parsed_line[col_name]:
vocabs[col_name][parsed_line[col_name]] += 1
elif transform in constant.NUMERIC_TRANSFORMS:
if not parsed_line[col_name].strip():
continue
numerical_results[col_name]["min" ] = (
min(numerical_results[col_name]["min" ],
float (parsed_line[col_name])))
numerical_results[col_name]["max" ] = (
max(numerical_results[col_name]["max" ],
float (parsed_line[col_name])))
numerical_results[col_name]["count" ] += 1
numerical_results[col_name]["sum" ] += float (parsed_line[col_name])
elif transform == constant.IMAGE_TRANSFORM:
pass
elif transform == constant.KEY_TRANSFORM:
pass
else :
raise ValueError ("Unknown transform %s" % transform)
vocab_sizes = {}
for name, label_count in six.iteritems(vocabs):
After Change
inverted_features_target = copy.deepcopy(inverted_features)
for name, transform_set in six.iteritems(inverted_features_target):
if transform_set == set([constant.TARGET_TRANSFORM]):
target_schema = next(col["type" ].lower() for col in schema if col["name" ] == name)
if target_schema in constant.NUMERIC_SCHEMA:
inverted_features_target[name] = {constant.IDENTITY_TRANSFORM}
else :
inverted_features_target[name] = {constant.ONE_HOT_TRANSFORM}
def _init_numerical_results () :
return {"min" : float ("inf" ),
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 11
Instances Project Name: googledatalab/pydatalab
Commit Name: 5b48f9a9c097d26d395873044ceaa1a0b886682a
Time: 2017-06-14
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/mltoolbox/code_free_ml/analyze.py
Class Name:
Method Name: run_local_analysis
Project Name: googledatalab/pydatalab
Commit Name: 5b48f9a9c097d26d395873044ceaa1a0b886682a
Time: 2017-06-14
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/mltoolbox/code_free_ml/analyze.py
Class Name:
Method Name: run_cloud_analysis
Project Name: uber/ludwig
Commit Name: 3e2f276459f976054b5c2ab8c55be994170345da
Time: 2020-08-27
Author: carlo.grisetti@dsgroup.it
File Name: ludwig/utils/defaults.py
Class Name:
Method Name: merge_with_defaults