rp_capstone/rouge_scoring.py at master · koayst/rp_capstone · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#pip install rouge

import pandas as pd
import argparse

def scoring(bertdf,userdf):

    #1.1 clean up bert df, extract the first row, and reset index
    bertsum = bertdf.iloc[0:1]
    bertsum =bertsum.to_string(header=False,index=False,index_names=False)
    bertsum= [int(s) for s in bertsum.split(',')]

    bertdf=bertdf.drop(0).reset_index(drop=True)

    #1.2 clean up user df, extract the first row, and reset index
    usersum = userdf.iloc[0:1]
    usersum =usersum.to_string(header=False,index=False,index_names=False)
    usersum= [int(s) for s in usersum.split(',')]

    userdf=userdf.drop(0).reset_index(drop=True)

    #2.1 convert to list
    berttitle= bertdf.columns[0]
    cleanedbert =bertdf.iloc[bertsum , : ]

    bertdf = cleanedbert[berttitle].tolist()
    bert = ' '.join([str(elem) for elem in bertdf])

    #2.2 convert to list
    usertitle= userdf.columns[0]
    cleaneduser =userdf.iloc[usersum , : ]

    userdf =cleaneduser[usertitle].tolist()
    user = ' '.join([str(elem) for elem in userdf])

    #scoring, we use rouge-l (ROUGE-L: Longest Common Subsequence based statistics, takes sentences into account)
    from rouge import Rouge

    rouge = Rouge()
    scores = rouge.get_scores(bert, user)
    f_score =scores[0]["rouge-l"]["f"]
    precision =scores[0]["rouge-l"]["p"]
    recall =scores[0]["rouge-l"]["r"]

    #print(f1,precision,recall)

    print("\nBERT: "+str(bertsum) +"\nUSER: "+str(usersum))

    print("\n\nROUGE scoring:\n\n"+
          "Precision is :"+"{:.2%}".format(precision)+
          "\nRecall is :"+"{:.2%}".format(recall)+
          "\nF Score is :"+"{:.2%}".format(f_score))

    print("\nPrecision: how much BERT summary exceeds human summary,\n(if less than 100% means user removed sentences)\n"
          "\nRecall: how much BERT summary explains the human summary,\n(if less than 100% means user added sentences)\n"
          "\nF Score: aggregation of BERT performance,\n(if 100% means perfect match)\n")
    return

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Rouge Scoring')
    parser.add_argument('filename_1', help='CSV file of BERT summarizer of a searched URL', nargs='?')
    parser.add_argument('filename_2', help='CSV file user changes', nargs='?')
    args = parser.parse_args()

    if args.filename_1 is not None and args.filename_2 is not None:

        #input 2 csv file and convert it to dataframe
        bert = pd.read_csv(args.filename_1)
        user = pd.read_csv(args.filename_2)

        #scoring (machine, human) - this order is important
        scoring(bert,user)
    else:
        # print usage help if either file name is not provided
        print(parser.print_help())