{"id":32248,"date":"2025-01-14T14:27:47","date_gmt":"2025-01-14T13:27:47","guid":{"rendered":"https:\/\/www.graviton.at\/letterswaplibrary\/looking-for-ad-detection-in-text-datasets\/"},"modified":"2025-01-14T14:27:47","modified_gmt":"2025-01-14T13:27:47","slug":"looking-for-ad-detection-in-text-datasets","status":"publish","type":"post","link":"https:\/\/www.graviton.at\/letterswaplibrary\/looking-for-ad-detection-in-text-datasets\/","title":{"rendered":"Looking For Ad Detection In Text Datasets"},"content":{"rendered":"<p><!-- SC_OFF --><\/p>\n<div class=\"md\">\n<p>I have a bunch of audio and video files which have ads in them. My plan was to get transcripts of these files (maybe using whisper but not confirmed yet) and then detect which timestamps have ads on them. Anyone know any datasets that could help with this?<\/p>\n<\/div>\n<p><!-- SC_ON -->   submitted by   <a href=\"https:\/\/www.reddit.com\/user\/Captainphilipp21\"> \/u\/Captainphilipp21 <\/a> <br \/> <span><a href=\"https:\/\/www.reddit.com\/r\/datasets\/comments\/1i15kas\/looking_for_ad_detection_in_text_datasets\/\">[link]<\/a><\/span>   <span><a href=\"https:\/\/www.reddit.com\/r\/datasets\/comments\/1i15kas\/looking_for_ad_detection_in_text_datasets\/\">[comments]<\/a><\/span><\/p><div class='watch-action'><div class='watch-position align-right'><div class='action-like'><a class='lbg-style1 like-32248 jlk' href='javascript:void(0)' data-task='like' data-post_id='32248' data-nonce='614a020375' rel='nofollow'><img class='wti-pixel' src='https:\/\/www.graviton.at\/letterswaplibrary\/wp-content\/plugins\/wti-like-post\/images\/pixel.gif' title='Like' \/><span class='lc-32248 lc'>0<\/span><\/a><\/div><\/div> <div class='status-32248 status align-right'><\/div><\/div><div class='wti-clear'><\/div>","protected":false},"excerpt":{"rendered":"<p>I have a bunch of audio and video files which have ads in them. My plan was&#8230;<\/p>\n","protected":false},"author":0,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[85],"tags":[],"class_list":["post-32248","post","type-post","status-publish","format-standard","hentry","category-datatards","wpcat-85-id"],"_links":{"self":[{"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/posts\/32248","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/comments?post=32248"}],"version-history":[{"count":0,"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/posts\/32248\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/media?parent=32248"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/categories?post=32248"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.graviton.at\/letterswaplibrary\/wp-json\/wp\/v2\/tags?post=32248"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}