-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrouge_scoring.py
More file actions
75 lines (55 loc) · 2.7 KB
/
rouge_scoring.py
File metadata and controls
75 lines (55 loc) · 2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#pip install rouge
import pandas as pd
import argparse
def scoring(bertdf,userdf):
#1.1 clean up bert df, extract the first row, and reset index
bertsum = bertdf.iloc[0:1]
bertsum =bertsum.to_string(header=False,index=False,index_names=False)
bertsum= [int(s) for s in bertsum.split(',')]
bertdf=bertdf.drop(0).reset_index(drop=True)
#1.2 clean up user df, extract the first row, and reset index
usersum = userdf.iloc[0:1]
usersum =usersum.to_string(header=False,index=False,index_names=False)
usersum= [int(s) for s in usersum.split(',')]
userdf=userdf.drop(0).reset_index(drop=True)
#2.1 convert to list
berttitle= bertdf.columns[0]
cleanedbert =bertdf.iloc[bertsum , : ]
bertdf = cleanedbert[berttitle].tolist()
bert = ' '.join([str(elem) for elem in bertdf])
#2.2 convert to list
usertitle= userdf.columns[0]
cleaneduser =userdf.iloc[usersum , : ]
userdf =cleaneduser[usertitle].tolist()
user = ' '.join([str(elem) for elem in userdf])
#scoring, we use rouge-l (ROUGE-L: Longest Common Subsequence based statistics, takes sentences into account)
from rouge import Rouge
rouge = Rouge()
scores = rouge.get_scores(bert, user)
f_score =scores[0]["rouge-l"]["f"]
precision =scores[0]["rouge-l"]["p"]
recall =scores[0]["rouge-l"]["r"]
#print(f1,precision,recall)
print("\nBERT: "+str(bertsum) +"\nUSER: "+str(usersum))
print("\n\nROUGE scoring:\n\n"+
"Precision is :"+"{:.2%}".format(precision)+
"\nRecall is :"+"{:.2%}".format(recall)+
"\nF Score is :"+"{:.2%}".format(f_score))
print("\nPrecision: how much BERT summary exceeds human summary,\n(if less than 100% means user removed sentences)\n"
"\nRecall: how much BERT summary explains the human summary,\n(if less than 100% means user added sentences)\n"
"\nF Score: aggregation of BERT performance,\n(if 100% means perfect match)\n")
return
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Rouge Scoring')
parser.add_argument('filename_1', help='CSV file of BERT summarizer of a searched URL', nargs='?')
parser.add_argument('filename_2', help='CSV file user changes', nargs='?')
args = parser.parse_args()
if args.filename_1 is not None and args.filename_2 is not None:
#input 2 csv file and convert it to dataframe
bert = pd.read_csv(args.filename_1)
user = pd.read_csv(args.filename_2)
#scoring (machine, human) - this order is important
scoring(bert,user)
else:
# print usage help if either file name is not provided
print(parser.print_help())