f70e71d5c7fdc8e25391e54e74c3402fb323ad5c,examples/plot_employee_salaries.py,,,#,45
Before Change
// the rest will have a standard encoding
data_path = fetching.get_data_dir()
fetching.fetch_employee_salaries()
data_file = os.path.join(data_path, "employee_salaries", "rows.csv")
df = pd.read_csv(data_file).astype(str)
df["Current Annual Salary"] = [float(s[1:]) for s
in df["Current Annual Salary"]]
df["Year First Hired"] = [int(s.split("/")[-1])
for s in df["Date First Hired"]]
target_column = "Current Annual Salary"
y = df[target_column].values.ravel()
After Change
// the other column are supposed clean, so it is "safe" to use
// one hot encoding to transform them
clean_columns = {
"Gender": "one-hot",
"Department Name": "one-hot",
"Assignment Category": "one-hot",
"Year First Hired": "num"}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We then choose which categorical encoding methods to benchmark:
encoding_methods = ["one-hot", "target", "similarity"]
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: dirty-cat/dirty_cat
Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c
Time: 2018-06-06
Author: pierreglaser@msn.com
File Name: examples/plot_employee_salaries.py
Class Name:
Method Name:
Project Name: Pinafore/qb
Commit Name: e274abd78ec052dadae737b83a50531f0f8d7666
Time: 2018-02-02
Author: sjtufs@gmail.com
File Name: qanta/guesser/tied.py
Class Name:
Method Name:
Project Name: GoogleCloudPlatform/python-docs-samples
Commit Name: 8c18cecf15a8935d8bf712edcc91ac05daf2176e
Time: 2020-06-16
Author: tmatsuo@google.com
File Name: appengine/standard/noxfile-template.py
Class Name:
Method Name: