875a33806acea37f602d0ad20fb77cd42432bbb6,scripts/tokenize/tokenize_pad.py,,,#,6
Before Change
rcParams["debug"]=True
wl = Wordlist("pad_data_qlc.qlc")
wl.tokenize("pad_orthography_profile")
wl.output("qlc", filename="tokenized-pad_data")
After Change
print()
print("ID"+"\t"+"ORIGINAL"+"\t"+"RULES")
for line in infile:
line = line.strip()
tokens = line.split("\t")
id = tokens[0]
counterpart = tokens[2]
grapheme_clusters =t.grapheme_clusters(counterpart)
rules = t.rules(grapheme_clusters)
print(id+"\t"+counterpart+"\t"+rules)
// this tokenize does not work because of the way the orthography rules are currently written, i.e.
// they expect space delimited tokens; the wordlist.tokenize() function first apples the rules
// and the the Unicode grapheme cluster tokenization
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: lingpy/lingpy
Commit Name: 875a33806acea37f602d0ad20fb77cd42432bbb6
Time: 2013-11-08
Author: bambooforest@gmail.com
File Name: scripts/tokenize/tokenize_pad.py
Class Name:
Method Name:
Project Name: gooofy/zamia-speech
Commit Name: 9f376975884e7a0d7a553dcdfa1ab54b66ddbb1f
Time: 2018-12-10
Author: guenter@zamia.org
File Name: speech_editor.py
Class Name:
Method Name:
Project Name: daniel-kukiela/nmt-chatbot
Commit Name: 0e2a7f6d85a341959eba41d65019b2566084b406
Time: 2017-12-03
Author: daniel@kukiela.pl
File Name: inference.py
Class Name:
Method Name: