rplantiko

Map new ↦ old Twitter archive

Jan 3rd, 2021 (edited)
1,023
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // In 2020 or 2019, the structure of Twitter archives has been changed dramatically
  2. // In the new version, all media ressources (.jpg and .png images, and .mp4 videos) are archived as well.
  3. // This boosts the storage demand (my archive needs 1.8GB now, 95% of which are used for media)
  4. // and makes it unfit for online usage.
  5. // Also, the data model for the tweets and the layout for the storage has been changed.
  6.  
  7. // For more details, see this document (German language)
  8. // https://docs.google.com/document/d/1_sfWUF5AJ43007tQ07sZu9pnUMYHuzIkX0nr-vleuWc/edit?usp=sharing
  9.  
  10. // This Node Script maps the new format into the older format.
  11. // - It reads the file tweets.js which contains the text and metadata of all tweets
  12. // - It generates partitions for each month, named and with data format required by the old twitter archive
  13. // - Data for retweeted users will be extracted from the list of user mentions
  14.  
  15. global.window = {YTD: { tweet: { } } };  // The (new) JSON structure that wraps the tweet data
  16. const fs = require('fs');
  17. const DIR = "test/";
  18. const TWEET_NAME_PREFIX = "Grailbird.data.tweets_";
  19. const MYSELF = {
  20.   id: 192979771,
  21.   id_str: "192979771",
  22.   name: "Rüdiger Plantiko",
  23.   profile_image_url_https: "https://pbs.twimg.com/profile_images/1178630979/plantiko_normal.jpg",
  24.   protected: false,
  25.   screen_name: "rplantiko",
  26.   verified: false
  27. };
  28.  
  29. var tweets = readAllTweets();
  30. var tweet_index = [];
  31. writeByMonth( tweets );
  32. writeIndex( );
  33.  
  34. function readAllTweets() {
  35.   require('../tweet.js');
  36.   return window.YTD.tweet.part0.sort( byID );
  37. }
  38.  
  39.  
  40. function writeByMonth(tweets) {
  41.   var year = 0, month = 0;
  42.   tt = [];
  43.   for (let to of tweets) {
  44.     let t = to.tweet;
  45.     let d = new Date( t.created_at );
  46.     let m = d.getMonth( );
  47.     let y = d.getFullYear( );
  48.     if (y != year || ( ( y == year ) && ( m!= month )) ) {
  49.       writeToFile(year,month,tt);
  50.       year = y;
  51.       month = m;
  52.       tt = [];
  53.     }
  54.     tt.push( map_tweet(t) );
  55.   }
  56.   writeToFile( year,month,tt); // "don't forget the last"
  57. }
  58.  
  59. function writeIndex() {
  60.   tweet_index.sort( (a,b)=> {
  61.     let dy = a.year - b. year;
  62.     return - ( dy == 0 ? a.month - b.month : dy )} );
  63.   fs.writeFile( DIR+"tweet_index.js","var tweet_index = "+JSON.stringify( tweet_index,null,2), 'utf8', function(){} );
  64. }
  65.  
  66. function map_tweet(t) {
  67.   var t1 = {
  68.     source:t.source,
  69.     id:t.id,
  70.     id_str:t.id_str,
  71.     created_at:t.created_at,
  72.     text:t.full_text,
  73.     user:t.user||MYSELF,
  74.     entities:t.entities
  75.   };
  76.  
  77. // Simple solution for retweets:
  78. // If text starts with "RT @user:...",
  79. // extract user from user_mentions,
  80. // if it can be found there
  81.    var m = t1.text.match(/^RT @([^:]*):/);
  82.    if (m) {
  83.      let user = t1.entities.user_mentions.find( u=>u.screen_name == m[1] );
  84.      if (user) {
  85.        t1.user = user;
  86. // Coerce type to number:
  87. // (in the new format, the indices are given as strings)
  88.        t1.user.indices = t1.user.indices.map( x => x*1 );
  89.      }
  90.    }  
  91.  
  92.   return t1;
  93. }
  94.  
  95. function writeToFile( year, month, tt ) {
  96.   if (year < 2010) return;
  97.   var month2 = ('0'+(month+1)).substr(-2);
  98.   var monthString = `${ year }_${ month2 }`;
  99.   var json = TWEET_NAME_PREFIX+monthString+" = \n"+JSON.stringify(tt);
  100.   fs.writeFile( DIR+monthString+'.js', json, 'utf8', (err) => {
  101.     if (err) throw err;
  102.   } );
  103.   tweet_index.push({
  104.     file_name:"data\\/js\\/tweets\\/"+monthString+".js",
  105.     var_name:"tweets_"+monthString,
  106.     year:year,
  107.     month:month+1,
  108.     tweet_count:tt.length
  109.   } );  
  110. }
  111.  
  112. function byID(a,b) {
  113.   let x = BigInt(a.tweet.id) - BigInt(b.tweet.id);
  114.   return x < 0 ? -1 : ( x > 0 ? 1 : 0 );
  115. }
Add Comment
Please, Sign In to add comment