f70e71d5c7fdc8e25391e54e74c3402fb323ad5c,examples/plot_employee_salaries.py,,,#,45
Before Change
fetching.fetch_employee_salaries()
data_file = os.path.join(data_path, "employee_salaries", "rows.csv")
df = pd.read_csv(data_file).astype(str)
df["Current Annual Salary"] = [float(s[1:] ) for s
in df["Current Annual Salary"]]
df["Year First Hired"] = [int(s.split("/")[-1])
for s in df["Date First Hired"]]
After Change
// the other column are supposed clean, so it is "safe" to use
// one hot encoding to transform them
clean_columns = {
"Gender": "one-hot",
"Department Name": "one-hot",
"Assignment Category": "one-hot",
"Year First Hired": "num"}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We then choose which categorical encoding methods to benchmark:
encoding_methods = ["one-hot", "target", "similarity"]
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 3
Instances Project Name: dirty-cat/dirty_cat
Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c
Time: 2018-06-06
Author: pierreglaser@msn.com
File Name: examples/plot_employee_salaries.py
Class Name:
Method Name:
Project Name: nilmtk/nilmtk
Commit Name: 4a87f3b65125fb105168e4ff3f9aff3edfb9ab58
Time: 2014-05-21
Author: jack-list@xlk.org.uk
File Name: nilmtk/tests/generate_test_data.py
Class Name:
Method Name: create_energy_hdf5
Project Name: gooofy/zamia-speech
Commit Name: 85861ea5c5320518267405bef050a0dab933e069
Time: 2018-02-17
Author: guenter@zamia.org
File Name: phone_gen.py
Class Name:
Method Name:
Project Name: AlexsLemonade/refinebio
Commit Name: c8933a93b75c8c0208848fe2d50d2ce3f1ee04d4
Time: 2020-11-04
Author: samuel.tate@outlook.com
File Name: api/data_refinery_api/exceptions.py
Class Name:
Method Name: custom_exception_handler