95b8e2a603f66cc3091e3266a717c0f206be3e95,FeatureSelection.py,,,#,34

Before Change



//file read (needs to be removed in final version)
test_filename = "test.csv"
train_filename = "train.csv"

train_news = pd.read_csv(train_filename)
test_news = pd.read_csv(test_filename)


//we will start with simple bag of words technique 
//creating feature vector - document term matrix
countV = CountVectorizer()
train_count = countV.fit_transform(train_news["Statement"])

//print training doc term matrix
//we have matrix of size of (10240, 12196) by calling below
train_count.shape

//check vocabulary using below command
print(countV.vocabulary_)

//get feature names
print(countV.get_feature_names()[:25])

//


//tf-idf 
tfidfV = TfidfTransformer()
train_tfidf = tfidfV.fit_transform(train_count)
train_tfidf.shape

//get train data feature names 
print(train_tfidf.A[:10])

After Change


//we will start with simple bag of words technique 
//creating feature vector - document term matrix
countV = CountVectorizer()
train_count = countV.fit_transform(DataPrep.train_news["Statement"].values)

print(countV)
print(train_count)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances


Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 95b8e2a603f66cc3091e3266a717c0f206be3e95
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name:


Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 99973b2eb7ee37508b81154154652ab282ea061d
Time: 2017-12-10
Author: nkp3@illinois.edu
File Name: classifier.py
Class Name:
Method Name:


Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 3b49ffd98696ad697cf2b9685e581459d51ea0b1
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name: