21a70686e7023743c5cae3fe6660c8782b8300fc,keyword_spotting_data_generator/evaluation/evaluation_data_generator.py,,main,#,20

Before Change


    continuing = args.continue_from != None

    for i in range(args.size):
        url = url_fetcher.next()[0]

        if continuing:
            if url != args.continue_from:
                continue
            else:
                continuing = False

        if not url:
            cp.print_warning("there are no more urls to process")

        cp.print_progress(i + 1, " / ", args.size, " - ", url)

        try:
            video = PyTube(utils.get_youtube_url(url))
        except Exception as exception:
            cp.print_error("failed to generate PyTube representation for vidoe ", url)
            continue

        if int(video.length) > args.video_length:
            continue

        caption = video.captions.get_by_language_code("en")
        if not caption:
            cp.print_warning("no caption available for video - ", url)
            continue

        try:
            srt_captions = caption.generate_srt_captions().split("\n\n")
        except Exception as exception:
            cp.print_error("failed to retrieve for vidoe - ", url)
            continue

        translator = str.maketrans("", "", string.punctuation) // to remove punctuation
        srt_tag_re = re.compile(r"<.*?>|\(.*?\)|\[.*?\]")

        keyword_exist = False
        for captions in srt_captions:
            if keyword in captions or plural.plural(keyword) in captions:
                keyword_exist = True
                break

        if not keyword_exist:
            cp.print_warning("keywords never appear in the video - ", url)
            continue

        try:
            crawler = YoutubeCrawler(url)
            audio_data = crawler.get_audio()
        except Exception as exception:
            cp.print_warning(exception)
            continue

        collected_data = []
        video_cc_count = 0
        video_audio_count = 0

        for captions in srt_captions:
            cc_split = captions.split("\n")
            if len(cc_split) == 4 and cc_split[0] == "":
                cc_split = (cc_split[1], cc_split[2], cc_split[3])
            elif len(cc_split) != 3:

After Change


    url_set = set()

    for i in range(args.size):
        url = url_fetcher.next()

        if not url:
            cp.print_warning("there are no more urls to process")
            break

        url = url[0]

        if continuing:
            if url != args.continue_from:
                continue
            else:
                continuing = False

        cp.print_progress(i + 1, " / ", args.size, " - ", url)

        if url in url_set:
            cp.print_warning("video is already processed", url)
            continue

        url_set.add(url)

        if continuing:
            if url != args.continue_from:
                continue
            else:
                continuing = False

        try:
            video = PyTube(utils.get_youtube_url(url))
        except Exception as exception:
            cp.print_error("failed to generate PyTube representation for vidoe ", url)
            continue
        if int(video.length) > args.video_length:
            continue

        caption = video.captions.get_by_language_code("en")
        if not caption:
            cp.print_warning("no caption available for video - ", url)
            continue

        try:
            srt_captions = caption.generate_srt_captions().split("\n\n")
        except Exception as exception:
            cp.print_error("failed to retrieve for vidoe - ", url)
            continue

        translator = str.maketrans("", "", string.punctuation) // to remove punctuation
        srt_tag_re = re.compile(r"<.*?>|\(.*?\)|\[.*?\]")

        keyword_exist = False
        for captions in srt_captions:
            if keyword in captions or plural.plural(keyword) in captions:
                keyword_exist = True
                break

        if not keyword_exist:
            cp.print_warning("keywords never appear in the video - ", url)
            continue

        try:
            crawler = YoutubeCrawler(url)
            audio_data = crawler.get_audio()
        except Exception as exception:
            cp.print_warning(exception)
            continue

        collected_data = []
        video_cc_count = 0
        video_audio_count = 0

        for captions in srt_captions:
            cc_split = captions.split("\n")
            if len(cc_split) == 4 and cc_split[0] == "":
                cc_split = (cc_split[1], cc_split[2], cc_split[3])
            elif len(cc_split) != 3:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 2

Instances


Project Name: castorini/honk
Commit Name: 21a70686e7023743c5cae3fe6660c8782b8300fc
Time: 2019-02-01
Author: ljj7975@gmail.com
File Name: keyword_spotting_data_generator/evaluation/evaluation_data_generator.py
Class Name:
Method Name: main


Project Name: castorini/honk
Commit Name: 26e1bc63f306324e6689f30f0d33f5859a00a6ce
Time: 2020-02-23
Author: ljj7975@gmail.com
File Name: keyword_spotting_data_generator/keyword_data_generator.py
Class Name:
Method Name: generate_dataset


Project Name: acoular/acoular
Commit Name: 3b01007e668a7902b467978be69b1dd1f19108a9
Time: 2020-03-31
Author: kujawski.ad@gmail.com
File Name: acoular/tbeamform.py
Class Name: BeamformerCleantTraj
Method Name: result


Project Name: raghakot/keras-vis
Commit Name: 7a37bd0716fcb11047a2ae9d7ee32c6d75a48efc
Time: 2017-07-16
Author: ragha@outlook.com
File Name: vis/backend/tensorflow_backend.py
Class Name:
Method Name: modify_model_backprop