Untitled

from pandas.io.json import json_normalize

d = [{'_id':'Y100','paper_title':'translation using information on dialogue participants','reference':'beattie, gs (2005, november) social causes of depression retrieved may 31, 2017, from'},
     {'_id':'Y100','paper_title':'translation using information on dialogue participants','reference':'burton, n (2012, june 5) depressive realism retrieved may 31, 2017, from'},
     {'_id':'Y100','paper_title':'translation using information on dialogue participants','reference':'gotlib, i h, 27 hammen, c l (1992) #supportthecause: identifying motivations to participate in online health campaigns new york: wiley'},
     {'_id':'Y101','paper_title':'#emotional tweets','reference':'paul ekman 1992 an argument for basic emotions cognition and emotion, 6(3):169200'},
     {'_id':'Y101','paper_title':'#emotional tweets','reference':'saif m mohammad 2012a #tagspace: semantic embeddings from hashtags in mail and books to appear in decision support systems'},
     {'_id':'Y101','paper_title':'#emotional tweets','reference':'robert plutchik 1985 on emotion: the chickenand-egg problem revisited motivation and emotion, 9(2):197200'},
     {'_id':'Y102','paper_title':'#supportthecause: identifying motivations to participate in online health campaigns','reference':'alastair iain johnston, rawi abdelal, yoshiko herrera, and rose mcdermott, editors 2009 translation using information on dialogue participants cambridge university press'},
     {'_id':'Y102','paper_title':'#supportthecause: identifying motivations to participate in online health campaigns','reference':'j richard landis and gary g koch 1977 the measurement of observer agreement for categorical data biometrics, 33(1):159174'},
     {'_id':'Y102','paper_title':'#supportthecause: identifying motivations to participate in online health campaigns','reference':'tomas mikolov, kai chen, greg corrado, and jeffrey dean 2013  #emotional tweets arxiv:13013781'}]

df1 = pd.DataFrame.from_dict(json_normalize(d) )

df1


def return_id(paper_title,reference, _id):
    if (paper_title is None) or (reference is None):
        return None
    if paper_title in reference:
        return _id
    else:
        return None

df1['paper_present_in'] = df1.apply(lambda row: return_id(row['paper_title'], row['reference'], row['_id'], axis=1)