4b21300999e11ba6f91952c05a936ccec0673e2e,nltk/tokenize/treebank.py,TreebankWordTokenizer,span_tokenize,#TreebankWordTokenizer#Any#,147
Before Change
ix = 0
spans = []
for word_token in self.tokenize(text):
if word_token in ("``", """"):
orig_idx = text.find(word_token, ix)
quote_idx = text.find(""", ix)
if orig_idx < 0:
real_token = """
elif quote_idx < 0:
real_token = word_token
elif orig_idx < quote_idx:
real_token = word_token
else:
real_token = """
else:
real_token = word_token
ix = text.find(real_token, ix)
end = ix + len(real_token)
spans.append((ix, end))
ix = end
return spans
class TreebankWordDetokenizer(TokenizerI):
After Change
// treated as starting quotes).
if (""" in text) or ("""" in text):
// Find double quotes and converted quotes
matched = [m.group() for m in re.finditer(r"``|"{2}|\"", text)]
// Replace converted quotes back to double quotes
tokens = [matched.pop(0) if tok in [""", "``", """"] else tok for tok in raw_tokens]
else:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances Project Name: nltk/nltk
Commit Name: 4b21300999e11ba6f91952c05a936ccec0673e2e
Time: 2017-11-29
Author: lyyb46@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: span_tokenize
Project Name: hellohaptik/chatbot_ner
Commit Name: e870ef14c590502fb0dc5ff3199e2602a87ec008
Time: 2019-03-18
Author: jain.chirag925@gmail.com
File Name: ner_v1/detectors/numeral/budget/budget_detection.py
Class Name: BudgetDetector
Method Name: _detect_max_budget
Project Name: hellohaptik/chatbot_ner
Commit Name: e870ef14c590502fb0dc5ff3199e2602a87ec008
Time: 2019-03-18
Author: jain.chirag925@gmail.com
File Name: ner_v1/detectors/numeral/budget/budget_detection.py
Class Name: BudgetDetector
Method Name: _detect_min_budget