71432d1890209628e189edf04d7d623160943718,autokeras/hypermodel/preprocessor.py,TextToNgramVector,transform,#TextToNgramVector#Any#Any#,182
Before Change
sentence = nest.flatten(x)[0].numpy().decode("utf-8")
data = self.vectorizer.transform([sentence]).toarray()
if self.selector:
data = self.selector.transform(data).astype("float32")
return data[0]
def output_types(self):
return (tf.float32,)
After Change
// Calculate tf at doc level
tf = np.zeros(len(self.vocabulary), dtype=int)
x = nest.flatten(x)[0].numpy().decode("utf-8")
token_pattern = re.compile(r"(?u)\b\w\w+\b")
tokens = self._word_ngram(token_pattern.findall(x.lower()))
for feature in tokens:
if feature in self.vocabulary:
feature_idx = self.vocabulary[feature]
tf[feature_idx] += 1
result = tf * self.k_best_idf_values
result = normalize([result], norm=self.norm, copy=False)[0]
return result
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: keras-team/autokeras
Commit Name: 71432d1890209628e189edf04d7d623160943718
Time: 2019-12-18
Author: 33369174+Davidsirui@users.noreply.github.com
File Name: autokeras/hypermodel/preprocessor.py
Class Name: TextToNgramVector
Method Name: transform
Project Name: deepmipt/DeepPavlov
Commit Name: c672738b39c8f739551da0d88991a2fedfba3e14
Time: 2018-10-04
Author: puleon@mail.ru
File Name: deeppavlov/models/ranking/metrics.py
Class Name:
Method Name: recall_at_k
Project Name: deepmipt/DeepPavlov
Commit Name: c672738b39c8f739551da0d88991a2fedfba3e14
Time: 2018-10-04
Author: puleon@mail.ru
File Name: deeppavlov/models/ranking/metrics.py
Class Name:
Method Name: rank_response