File size: 18,418 Bytes
bbe40df
5c792f0
bbe40df
 
0f14448
 
1707180
0f14448
 
bbe40df
 
 
 
9cc1a37
9a19cd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acdd904
9a19cd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b630e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a19cd5
35d051d
9cc1a37
bbe40df
 
 
b4895cc
 
 
dd4ffe9
1ac8f2a
 
 
db98af3
dd4ffe9
 
116f2f9
dd4ffe9
1ac8f2a
 
 
dd4ffe9
b4895cc
1ac8f2a
fedacac
1707180
 
 
 
 
02675ac
 
 
6ee5293
1707180
2ecadd9
b77a858
 
098e077
1707180
098e077
6ee5293
93f7dbe
57b24ef
098e077
 
 
02675ac
1707180
1ac8f2a
 
 
 
 
db98af3
57b24ef
ada62bf
1ac8f2a
 
 
1707180
 
098e077
1ac8f2a
57b24ef
 
 
 
9832163
fedacac
ada62bf
9832163
 
 
 
 
1707180
 
fedacac
 
 
 
 
 
3bc676b
db98af3
b5d1e6d
 
db98af3
bbe40df
 
f61c39a
fedacac
caa9da7
 
6908e17
caa9da7
 
 
 
b840fea
8aa5536
d2d0911
bbe40df
 
 
 
 
 
 
 
 
 
a1beca9
bbe40df
4d0465f
 
 
85200e9
4d0465f
85200e9
 
5d2ec6a
a346039
bbe40df
e40ec72
 
1707180
f61c39a
db98af3
 
 
3f86dc6
1707180
b77a858
dd4ffe9
f61c39a
dd4ffe9
 
e40ec72
dd4ffe9
1ac8f2a
 
f61c39a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
## SETUP #####################################################################################################################

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import streamlit as st
import pandas as pd
from PIL import Image
import os

# load model
model = AutoModelForSequenceClassification.from_pretrained("amandakonet/climatebert-fact-checking")
tokenizer = AutoTokenizer.from_pretrained("amandakonet/climatebert-fact-checking")

# read in example
ex_claims = ['Global warming is driving polar bears toward extinction',
 'Global warming is driving polar bears toward extinction',
 'Global warming is driving polar bears toward extinction',
 'Global warming is driving polar bears toward extinction',
 'Global warming is driving polar bears toward extinction',
 'Climate skeptics argue temperature records have been adjusted in recent years to make the past appear cooler and the present warmer, although the Carbon Brief showed that NOAA has actually made the past warmer, evening out the difference.',
 'Climate skeptics argue temperature records have been adjusted in recent years to make the past appear cooler and the present warmer, although the Carbon Brief showed that NOAA has actually made the past warmer, evening out the difference.',
 'Climate skeptics argue temperature records have been adjusted in recent years to make the past appear cooler and the present warmer, although the Carbon Brief showed that NOAA has actually made the past warmer, evening out the difference.',
 'Climate skeptics argue temperature records have been adjusted in recent years to make the past appear cooler and the present warmer, although the Carbon Brief showed that NOAA has actually made the past warmer, evening out the difference.',
 'Climate skeptics argue temperature records have been adjusted in recent years to make the past appear cooler and the present warmer, although the Carbon Brief showed that NOAA has actually made the past warmer, evening out the difference.',
 'We don\'t expect record years every year, but the ongoing long-term warming trend is clear.',
 'We don\'t expect record years every year, but the ongoing long-term warming trend is clear.',
 'We don\'t expect record years every year, but the ongoing long-term warming trend is clear.',
 'We don\'t expect record years every year, but the ongoing long-term warming trend is clear.',
 'We don\'t expect record years every year, but the ongoing long-term warming trend is clear.',
 'It is increasingly clear that the planet was significantly warmer than today several times during the past 10,000 years.',
 'It is increasingly clear that the planet was significantly warmer than today several times during the past 10,000 years.',
 'It is increasingly clear that the planet was significantly warmer than today several times during the past 10,000 years.',
 'It is increasingly clear that the planet was significantly warmer than today several times during the past 10,000 years.',
 'It is increasingly clear that the planet was significantly warmer than today several times during the past 10,000 years.',
 'Eleven percent of all global greenhouse gas emissions caused by humans are caused by deforestation, comparable to the emissions from all of the cars and trucks on the planet.',
 'Eleven percent of all global greenhouse gas emissions caused by humans are caused by deforestation, comparable to the emissions from all of the cars and trucks on the planet.',
 'Eleven percent of all global greenhouse gas emissions caused by humans are caused by deforestation, comparable to the emissions from all of the cars and trucks on the planet.',
 'Eleven percent of all global greenhouse gas emissions caused by humans are caused by deforestation, comparable to the emissions from all of the cars and trucks on the planet.',
 'Eleven percent of all global greenhouse gas emissions caused by humans are caused by deforestation, comparable to the emissions from all of the cars and trucks on the planet.']

ex_evidence = ['Recent Research Shows Human Activity Driving Earth Towards Global Extinction Event.',
 'Environmental impacts include the extinction or relocation of many species as their ecosystems change, most immediately the environments of coral reefs, mountains, and the Arctic.',
 'Rising temperatures push bees to their physiological limits, and could cause the extinction of bee populations.',
 'Rising global temperatures, caused by the greenhouse effect, contribute to habitat destruction, endangering various species, such as the polar bear.',
 '"Bear hunting caught in global warming debate".',
 'It is a major aspect of climate change, and has been demonstrated by the instrumental temperature record which shows global warming of around 1\xa0°C since the pre-industrial period, although the bulk of this (0.9°C) has occurred since 1970.',
 'Improved measurement and analysis techniques have reconciled this discrepancy: corrected buoy and satellite surface temperatures are slightly cooler and corrected satellite and radiosonde measurements of the tropical troposphere are slightly warmer.',
 'Reconstructions have consistently shown that the rise in the instrumental temperature record of the past 150 years is not matched in earlier centuries, and the name "hockey stick graph" was coined for figures showing a long-term decline followed by an abrupt rise in temperatures.',
 'It concluded, "The weight of current multi-proxy evidence, therefore, suggests greater 20th-century warmth, in comparison with temperature levels of the previous 400 years, than was shown in the TAR.',
 'In at least some areas, the recent period appears to be warmer than has been the case for a thousand or more years".',
 'Climate change is a long-term, sustained trend of change in climate.',
 'Using the long-term temperature trends for the earth scientists and statisticians conclude that it continues to warm through time.',
 'While record-breaking years attract considerable public interest, individual years are less significant than the overall trend.',
 'This long-term trend is the main cause for the record warmth of 2015 and 2016, surpassing all previous years—even ones with strong El Niño events."',
 'Between 1850 and 1950 a long-term trend of gradual climate warming is observable, and during this same period the Marsham record of oak-leafing dates tended to become earlier.',
 'During the Mesozoic, the world, including India, was considerably warmer than today.',
 'Consequently, summers are 2.3\xa0°C (4\xa0°F) warmer in the Northern Hemisphere than in the Southern Hemisphere under similar conditions.',
 'The result is a picture of relatively cool conditions in the seventeenth and early nineteenth centuries and warmth in the eleventh and early fifteenth centuries, but the warmest conditions are apparent in the twentieth century.',
 "The current scientific consensus is that: Earth's climate has warmed significantly since the late 1800s.",
 'However, the geological record demonstrates that Earth has remained at a fairly constant temperature throughout its history, and that the young Earth was somewhat warmer than it is today.',
 'Tropical deforestation is responsible for approximately 20% of world greenhouse gas emissions.',
 'Of these emissions, 65% was carbon dioxide from fossil fuel burning and industry, 11% was carbon dioxide from land use change, which is primarily due to deforestation, 16% was from methane, 6.2% was from nitrous oxide, and 2.0% was from fluorinated gases.',
 'Land-use change, such as deforestation, caused about 31% of cumulative emissions over 1870–2017, coal 32%, oil 25%, and gas 10%.',
 'The estimate of total CO 2 emissions includes biotic carbon emissions, mainly from deforestation.',
 'The vast majority of anthropogenic carbon dioxide emissions come from combustion of fossil fuels, principally coal, oil, and natural gas, with additional contributions coming from deforestation, changes in land use, soil erosion and agriculture (including livestock).']

ex_labels = ['not enough info',
 'supports',
 'not enough info',
 'supports',
 'not enough info',
 'not enough info',
 'not enough info',
 'not enough info',
 'refutes',
 'refutes',
 'not enough info',
 'supports',
 'not enough info',
 'supports',
 'supports',
 'not enough info',
 'not enough info',
 'not enough info',
 'not enough info',
 'not enough info',
 'refutes',
 'not enough info',
 'refutes',
 'not enough info',
 'not enough info']

ex_df = pd.DataFrame({'claim' : ex_claims, 'evidence' : ex_evidence, 'label': ex_labels})


## TEXT ######################################################################################################################

# title
st.title('Combatting Climate Change Misinformation with Transformers')

st.markdown("## The Gist")

st.markdown("**Problem**🤔: Climate change misinformation spreads quickly and is difficult to combat. However, its important to do so, because climate change misinformation has direct impacts on public opinion and public policy surrounding climate change.")
st.markdown("**Solution**💡: Develop a pipeline in which users can input climate change claims... and the pipeline returns whether the claim is refuted or supported by current climate science, along with the corresponding evidence.")
st.markdown("**Approach**🔑:")
st.markdown("* There are many steps to this pipeline. Here, I focus on fine-tuning a transformer model, ClimateBERT, using the textual entailment task.")
st.markdown("* Given a {claim, evidence} pair, determine whether the climate claim is supported or refuted (or neither) by the evidence")
st.markdown("* The dataset used is Climate FEVER, a natural language inference dataset with 1,535 {claim, [evidence], [label]} tuples")

st.markdown("---")

st.markdown("## The Details")

# section 1: the context, problem; how to address
st.markdown("### Problem 🤔")
st.markdown("Misinformation about climate change spreads quickly and has direct impacts on public opinion and public policy surrounding the climate. Further, misinformation is difficult to combat, and people are able to \"verify\" false climate claims on biased sites. Ideally, people would be able to easily verify climate claims. This is where transformers come in.")


# section 2: what is misinformation? how is it combatted now? how successful is this?
st.markdown("### More about Misinformation")

st.markdown("What is misinformation? How does it spread?")
st.markdown("* **Misinformation** can be defined as “false or inaccurate information, especially that which is deliberately intended to deceive.”")
st.markdown("* It can exist in different domains, and each domain has different creators and distributors of misinformation.") 
st.markdown("* Misinformation regarding climate change is often funded by conservative foundations or large energy industries such as gas, coal, and oil. (1)")

misinfo_flowchart = Image.open('images/misinfo_chart.jpeg')
st.image(misinfo_flowchart, caption='The misinformation flowchart. (1)')

st.markdown("**Why does this matter?** Through echo chambers, polarization, and feedback loops, misinformation can spread from these large organizes to the public, thus arming the public with pursausive information designed to create scepticism around and/or denial of climate change, its urgency, and climate change scientists. This is especially problematic in democratic societies, where the public, to some extent, influences governmental policy decisions (brookings). Existing research suggests that misinformation directly contributes to public support of political inaction and active stalling or rejection of pro- climate change policies (1).")

st.markdown("How is climate change misinformation combatted now? Below are a few of the ways according to the Brookings Institute:")
st.markdown("1. Asking news sources to call out misinformation")
st.markdown("2. Teaching and encouraging media literacy among the public (how to detect fake news, critical evaluation of information provided, etc.")
st.markdown("3. Governments should encourage independent journalism but avoid censoring news")
st.markdown("4. Social media platform investment in algorithmic detection of fake news")

st.markdown("However, many of the proposed solutions above require adoption of behaviors. This is difficult to acheive, particularly among news organizations and social media platforms which receive monetary benefits from misinformation in the form of ad revenue from cite usage and viewership.")

# section 3: how can transformers help?
st.markdown("### How can Transformers Help?💡")

st.markdown("**FEVER**")
st.markdown("* FEVER, or Fact Extraction and VERification, was introduced in 2018 as the first dataset containing {fact, evdience, entailment_label} information. They extracted altering sentences from Wikipedia and had annotators report the relationship between the setences: entailment, contradition, not enough information.")
st.markdown("* Since then, other researchers have expanded on this area in different domains")
st.markdown("* Here, we use Climate FEVER (3), a similar dataset developed and annotated by ")

st.markdown("**Fact Verification / Fact-Checking**")
st.markdown("* This is simply an extenstion of the textual entailment task")
st.markdown("* Given two sentences, sent1 and sent2, determine the relationship: entail, contradict, neutral")
st.markdown("* With fact verification, we can think of the sentences as claim and evidence and labels as support, refute, or not enough information to refute or support.")

# section 4: The process
# this is the pipeline in my notes (u are here highlight)
st.markdown("### The Process 🔑")

st.markdown("Imagine: A person is curious about whether a claim they heard about climate change is true. How can transformers help validate or refute the claim?")

st.markdown("1. User inputs a climate claim")

st.markdown("2. Retrieve evidence related to input claim \
	- For each claim, collect N related documents. These documents are selected by finding the N documents with the highest similarity scores to the claim. A current area of research: How do we keep the set of curated documents up-to-date? Validate their contents?")

st.markdown("3. Send (claim, evidence) pairs to a transformer model. Have the model predict whether each evidence supports, refutes, or is not relevant to the claim. (📍 YOU ARE HERE!)")

st.markdown("4. Report back to the user: The supporting evidence for the claim (if any), the refuting evidence for the claim (if any). If no relevant evidence is found, report that the claim cannot be supported or refuted by current evidence.")


# section 5: my work
st.markdown("### Climate Claim Fact-Checking with Transformers")

st.markdown("My work focuses on step 3 of the process: Training a transformer model to accurately categorize (claim, evidence) as:")
st.markdown("* evidence *supports* (entails) claim")
st.markdown("* evidence *refutes* (contradicts) claim")
st.markdown("* evidence *does not provide enough info to support or refute* (neutral) claim")

st.markdown("For this project, I fine-tune ClimateBERT (4) on the text entailment task")
st.markdown("* ClimateBERT is a domain-adapted DistilRoBERTa model")
st.markdown("* Corpus used has ~1.7M climate-related passages taken from news articles, research abstracts, and corporate climate reports")

## EXAMPLE ###################################################################################################################

st.markdown("## Try it out!")

# select climate claim
option_claim = st.selectbox('Select a climate claim to test', ex_df['claim'].unique())

# filter df to selected claim
filtered_df = ex_df[ex_df['claim'] == option_claim]

# select evidence
option_evidence = st.selectbox('Select evidence to test', filtered_df['evidence'].unique())

st.markdown("Now, we can use your selected (claim, evidence) pair in the fine-tuned transformer!")


# tokenize
features = tokenizer(option_claim, option_evidence,  
                   padding='max_length', truncation=True, return_tensors="pt", max_length=512)

# set model into eval
model.eval()
with torch.no_grad():
   scores = model(**features).logits
   label_mapping = ['supports', 'refutes', 'not enough info']
   labels = [label_mapping[score_max] for score_max in scores.argmax(dim=1)]
   
   st.write("**Claim:**", option_claim)
   st.write("**Evidence**", option_evidence)

   st.write("**The predicted label is**:", labels[0])

   # clean up true label
   true_label = list(filtered_df[filtered_df['evidence'] == option_evidence]['label'])[0]
   st.write("**The true label is**", true_label)

st.write("Check out my github repository to try out custom claim and evidence pairs, linked under references.")

# section 6: analysis
st.markdown("## Critical Analysis")
st.markdown("What else could we do?")
st.markdown("* Given more data, the performance of the model can be greatly improved. This is just a proof of concept")
st.markdown("* This is only one small part of the puzzle!")
st.markdown("* In the complete pipeline (from user input to final output), we could move from just outputting evidence to training a transformer to reply with persuasive evidence. That is, instead of simply saying, \"This claim is supported by this evidence\", the model could transform the evidence into a persuasive argument, thus combatting climate change misinfo in a more platable and convincing way.")


# References + Resource Links
st.markdown("## Resource Links")

st.markdown("### References")
st.markdown("0. My [huggingface model card](https://huggingface.co/amandakonet/climatebert-fact-checking), [adopted Climate FEVER dataset card](https://huggingface.co/datasets/amandakonet/climate_fever_adopted), and [project code on github](https://github.com/amandakonet/climate-change-misinformation)")
st.markdown("1. https://www.carbonbrief.org/guest-post-how-climate-change-misinformation-spreads-online")
st.markdown("2. https://www.brookings.edu/research/how-to-combat-fake-news-and-disinformation/")
st.markdown("3. Climate FEVER [paper](https://arxiv.org/abs/2012.00614), [huggingface repo](https://huggingface.co/datasets/climate_fever), and [github](https://github.com/huggingface/datasets/tree/master/datasets/climate_fever)")
st.markdown("4. [ClimateBERT](https://climatebert.ai/), [paper](https://arxiv.org/abs/2110.12010)")