Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- Two Million Dataset
- CREATE OR REPLACE TABLE infringing_data AS
- WITH data AS (
- SELECT * FROM
- read_json ( 'data/posts_*.jsonl', format = 'newline_delimited' )
- )
- SELECT *,
- ( 'https://bsky.app/profile/<did>/post/' || regexp_extract (uri, 'at://.*/app.bsky.feed.post/([a-z0-9]+)', 1) ) as post_url
- FROM data
- WHERE author = '<did>';
- COPY ( select post_url FROM infringing_data ) TO 'data/infringing_data.csv';
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement