<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.2 20190208//EN" "JATS-archivearticle1-mathml3.dtd"> <article xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic" pub-type="epub">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">78717</article-id><article-id pub-id-type="doi">10.7554/eLife.78717</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research Article</subject></subj-group><subj-group subj-group-type="heading"><subject>Neuroscience</subject></subj-group></article-categories><title-group><article-title>Robust group- but limited individual-level (longitudinal) reliability and insights into cross-phases response prediction of conditioned fear</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" id="author-160183"><name><surname>Klingelhöfer-Jens</surname><given-names>Maren</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-5393-7871</contrib-id><email>m.klingelhoefer-jens@uke.de</email><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-262534"><name><surname>Ehlers</surname><given-names>Mana R</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-1316-3787</contrib-id><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-274678"><name><surname>Kuhn</surname><given-names>Manuel</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-2210-9130</contrib-id><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf2"/></contrib><contrib contrib-type="author" id="author-274677"><name><surname>Keyaniyan</surname><given-names>Vincent</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-5674-5197</contrib-id><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/></contrib><contrib contrib-type="author" id="author-104245"><name><surname>Lonsdorf</surname><given-names>Tina B</given-names></name><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-1501-4846</contrib-id><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="other" rid="fund1"/><xref ref-type="other" rid="fund2"/><xref ref-type="other" rid="fund3"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/></contrib><aff id="aff1"><label>1</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01zgy1s35</institution-id><institution>Institute for Systems Neuroscience, University Medical Center Hamburg-Eppendorf</institution></institution-wrap><addr-line><named-content content-type="city">Hamburg</named-content></addr-line><country>Germany</country></aff><aff id="aff2"><label>2</label><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01kta7d96</institution-id><institution>Department of Psychiatry, Harvard Medical School, and Center for Depression, Anxiety and Stress Research, McLean Hospital</institution></institution-wrap><addr-line><named-content content-type="city">Belmont</named-content></addr-line><country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Shackman</surname><given-names>Alexander</given-names></name><role>Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/047s2c258</institution-id><institution>University of Maryland</institution></institution-wrap><country>United States</country></aff></contrib><contrib contrib-type="senior_editor"><name><surname>Baker</surname><given-names>Chris I</given-names></name><role>Senior Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/01cwqze88</institution-id><institution>National Institute of Mental Health, National Institutes of Health</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group><pub-date publication-format="electronic" date-type="publication"><day>13</day><month>09</month><year>2022</year></pub-date><pub-date pub-type="collection"><year>2022</year></pub-date><volume>11</volume><elocation-id>e78717</elocation-id><history><date date-type="received" iso-8601-date="2022-03-17"><day>17</day><month>03</month><year>2022</year></date><date date-type="accepted" iso-8601-date="2022-09-12"><day>12</day><month>09</month><year>2022</year></date></history><pub-history><event><event-desc>This manuscript was published as a preprint at bioRxiv.</event-desc><date date-type="preprint" iso-8601-date="2022-03-18"><day>18</day><month>03</month><year>2022</year></date><self-uri content-type="preprint" xlink:href="https://doi.org/10.1101/2022.03.15.484434"/></event></pub-history><permissions><copyright-statement>© 2022, Klingelhöfer-Jens et al</copyright-statement><copyright-year>2022</copyright-year><copyright-holder>Klingelhöfer-Jens et al</copyright-holder><ali:free_to_read/><license xlink:href="http://creativecommons.org/licenses/by/4.0/"><ali:license_ref>http://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife-78717-v3.pdf"/><self-uri content-type="figures-pdf" xlink:href="elife-78717-figures-v3.pdf"/><abstract><p>Here, we follow the call to target measurement reliability as a key prerequisite for individual-level predictions in translational neuroscience by investigating (1) longitudinal reliability at the individual and (2) group level, (3) internal consistency and (4) response predictability across experimental phases. One hundred and twenty individuals performed a fear conditioning paradigm twice 6 months apart. Analyses of skin conductance responses, fear ratings and blood oxygen level dependent functional magnetic resonance imaging (BOLD fMRI) with different data transformations and included numbers of trials were conducted. While longitudinal reliability was rather limited at the individual level, it was comparatively higher for acquisition but not extinction at the group level. Internal consistency was satisfactory. Higher responding in preceding phases predicted higher responding in subsequent experimental phases at a weak to moderate level depending on data specifications. In sum, the results suggest that while individual-level predictions are meaningful for (very) short time frames, they also call for more attention to measurement properties in the field.</p></abstract><kwd-group kwd-group-type="author-keywords"><kwd>temporal stability</kwd><kwd>fear conditioning</kwd><kwd>skin conductance response</kwd><kwd>fear ratings</kwd><kwd>BOLD fMRI</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>Human</kwd></kwd-group><funding-group><award-group id="fund1"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001659</institution-id><institution>Deutsche Forschungsgemeinschaft</institution></institution-wrap></funding-source><award-id>INST 211/633-2</award-id><principal-award-recipient><name><surname>Lonsdorf</surname><given-names>Tina B</given-names></name></principal-award-recipient></award-group><award-group id="fund2"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001659</institution-id><institution>Deutsche Forschungsgemeinschaft</institution></institution-wrap></funding-source><award-id>LO 1980/4-1</award-id><principal-award-recipient><name><surname>Lonsdorf</surname><given-names>Tina B</given-names></name></principal-award-recipient></award-group><award-group id="fund3"><funding-source><institution-wrap><institution-id institution-id-type="FundRef">http://dx.doi.org/10.13039/501100001659</institution-id><institution>Deutsche Forschungsgemeinschaft</institution></institution-wrap></funding-source><award-id>LO 1980/7-1</award-id><principal-award-recipient><name><surname>Lonsdorf</surname><given-names>Tina B</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection, and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>Reliability and predictability analyses beyond standard measures provide empirically based guidance regarding the design of fear conditioning tasks to assess individual differences and group-level inferences cross-sectionally and longitudinally.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The increasing incidence (e.g., <xref ref-type="bibr" rid="bib137">Xiong et al., 2022</xref>) and high relapse rates (<xref ref-type="bibr" rid="bib27">Essau et al., 2018</xref>; <xref ref-type="bibr" rid="bib138">Yonkers et al., 2003</xref>) of anxiety-related disorders call for a better understanding of anxiety- and stress-related processes which might contribute to improving existing treatments or developing more effective interventions. In the laboratory, these processes can be studied using fear conditioning paradigms (<xref ref-type="bibr" rid="bib24">Dunsmoor et al., 2022</xref>; <xref ref-type="bibr" rid="bib36">Fullana et al., 2020</xref>; <xref ref-type="bibr" rid="bib80">Milad and Quirk, 2012</xref>).</p><p>In differential fear conditioning protocols (see <xref ref-type="bibr" rid="bib65">Lonsdorf et al., 2017a</xref>) one stimulus is repetitively paired with an aversive unconditioned stimulus (US; e.g., electrotactile stimulation), and as a consequence becomes a conditioned stimulus (CS+) while another stimulus, the CS−, is never paired with the US. After this acquisition training phase, CSs are presented without the US (extinction training) leading to a gradual waning of the conditioned response. Critically, the fear memory (CS+/US association) is not erased, but a competing inhibitory extinction memory (CS+/no US association) is assumed to be formed during extinction training (<xref ref-type="bibr" rid="bib80">Milad and Quirk, 2012</xref>; <xref ref-type="bibr" rid="bib83">Myers and Davis, 2007</xref>). Subsequently, return of fear (RoF) can be induced by procedural manipulations such as a time delay (spontaneous recovery), a contextual change (renewal, <xref ref-type="bibr" rid="bib125">Vervliet et al., 2013a</xref>), or a (re-)presentation of an aversive event (reinstatement, <xref ref-type="bibr" rid="bib45">Haaker et al., 2014</xref>). Conditioned responding can be subsequently probed in an RoF test phase during which either the absence (i.e., extinction retention) or the return of conditioned responding (i.e., RoF) can be observed (<xref ref-type="bibr" rid="bib13">Bouton, 2004</xref>; <xref ref-type="bibr" rid="bib65">Lonsdorf et al., 2017a</xref>).</p><p>Findings from studies employing fear conditioning paradigms hold strong potential for translating neuroscientific findings into clinical applications (<xref ref-type="bibr" rid="bib2">Anderson and Insel, 2006</xref>; <xref ref-type="bibr" rid="bib16">Cooper et al., 2022a</xref>; <xref ref-type="bibr" rid="bib36">Fullana et al., 2020</xref>; <xref ref-type="bibr" rid="bib80">Milad and Quirk, 2012</xref>). More precisely, extinction learning is assumed to be the active component of exposure-based treatment (<xref ref-type="bibr" rid="bib42">Graham and Milad, 2011</xref>; <xref ref-type="bibr" rid="bib80">Milad and Quirk, 2012</xref>; <xref ref-type="bibr" rid="bib98">Rachman, 1989</xref>; <xref ref-type="bibr" rid="bib126">Vervliet et al., 2013b</xref>) and experimental RoF manipulations have been suggested to serve as a model of clinical relapse (<xref ref-type="bibr" rid="bib106">Scharfenort et al., 2016</xref>; <xref ref-type="bibr" rid="bib125">Vervliet et al., 2013a</xref>). Important findings in the fear conditioning field include the deficient learning of the safety signal (CS−) during acquisition training, impaired extinction learning (<xref ref-type="bibr" rid="bib23">Duits et al., 2015</xref>) and the tendency of fear generalization to innocuous stimuli (<xref ref-type="bibr" rid="bib16">Cooper et al., 2022a</xref>) in patients suffering from anxiety-related disorders as compared to healthy controls.</p><p>To date, both clinical and experimental research using the fear conditioning paradigm have primarily focused on group-level, basic, general mechanisms such as the effect of experimental manipulations – which is important to investigate (<xref ref-type="bibr" rid="bib66">Lonsdorf and Merz, 2017b</xref>). Successful clinical translation (e.g., ‘Why do some individuals develop pathological anxiety while others do not?’) and particularly treatment outcome prediction (e.g., ‘Why do some patients benefit from treatment while others relapse?’), however, requires that both the experimental paradigm and the measures employed allow for individual-level predictions over and above prediction of group averages (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>; <xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib66">Lonsdorf and Merz, 2017b</xref>). A prerequisite for this is that the measures show stability within and reliable differences between individuals over time. Hence, tackling clinical questions regarding individual-level predictions of symptom development or treatment outcome requires a shift toward and a validation of research methods tailored to individual differences – such as a focus on measurement reliability (<xref ref-type="bibr" rid="bib146">Zuo et al., 2019</xref>). This is a necessary prerequisite for the long-term goal of developing individualized intervention and prevention programs. This further relates to the pronounced heterogeneity in symptom manifestation among individuals diagnosed with the same disorders (e.g., post-traumatic stress disorder, PTSD, <xref ref-type="bibr" rid="bib38">Galatzer-Levy and Bryant, 2013b</xref>) which cannot be captured in binary clinical diagnoses as two patients with for example a PTSD diagnosis may not share a single symptom (<xref ref-type="bibr" rid="bib38">Galatzer-Levy and Bryant, 2013b</xref>).</p><p>Measurement reliability has only recently gained momentum in experimental cognitive research (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>; <xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib146">Zuo et al., 2019</xref>) and can be assessed through test–retest and longitudinal reliability (i.e., test–retest reliability over longer time intervals, typically assessed through e.g., intraclass correlation coefficients, ICCs, see <xref ref-type="table" rid="table1">Table 1</xref>). Importantly, longitudinal reliability (for definitions and terminology, see <xref ref-type="table" rid="table1">Table 1</xref>) also has implications for the precision with which associations of one variable (e.g., conditioned responding) with another (individual difference) variable can be measured because the correlation between those two variables cannot exceed the correlations within, that is the reliability, of these two variables (<xref ref-type="bibr" rid="bib115">Spearman, 1910</xref>).</p><table-wrap id="table1" position="float"><label>Table 1.</label><caption><title>Definitions of key terms (A) and data specifications applied across analyses (B).</title></caption><table frame="hsides" rules="groups"><thead><tr><th align="left" valign="top" colspan="3">(A)</th><th align="left" valign="bottom" colspan="5"/></tr><tr><th align="left" valign="bottom" colspan="3">Term</th><th align="left" valign="bottom" colspan="5">Definition</th></tr></thead><tbody><tr><td align="left" valign="middle" colspan="3"><bold>Internal consistency</bold></td><td align="left" valign="bottom" colspan="5">In our study, internal consistency refers to the reliability of <bold>conditioned responding within experimental phases</bold> at both time points, respectively. It provides information on the extent to which items – or in our case – trials measure the same construct (e.g., fear acquisition). Odd and even trials were splitted (i.e., split-half method), averaged per subject and correlated across the sample.</td></tr><tr><td align="left" valign="middle" colspan="3"><bold>Longitudinal reliability at the individual level</bold></td><td align="left" valign="bottom" colspan="5">Longitudinal reliability at the individual level indicates to which extent <bold>responses within the same individuals are stable over time</bold>. It takes into account the individual responses of participants, which are then related across time points. Longitudinal reliability at the individual level inherently includes the group level, as it is calculated for the sample as a whole, but the individual responses are central to the calculation.</td></tr><tr><td align="left" valign="middle" colspan="3"><list list-type="bullet"><list-item><p><italic><bold>Intraclass correlation coefficients (ICCs)</bold></italic></p></list-item></list></td><td align="left" valign="bottom" colspan="5">‘ICC coefficients quantify the extent to which multiple measurements for each individual (within individuals) are statistically similar enough to discriminate between individuals’ (<xref ref-type="bibr" rid="bib1">Aldridge et al., 2017</xref>). Here, we calculated two types of ICCs, namely <bold>absolute agreement</bold> and <bold>consistency</bold>. To illustrate the difference between absolute agreement and consistency in a short example (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>), consider an interrater reliability study with two raters: Consistency indicates the extent to which the score of one rater (<italic>y</italic>) is equal to the score of another rater (<italic>x</italic>) plus a systematic error (<italic>c</italic>) (i.e., <italic>y</italic> = <italic>x</italic> + <italic>c</italic>). In contrast, absolute agreement indicates to which degree <italic>y</italic> equals <italic>x</italic>. As ‘two raters’ can be replaced by ‘two time points’ and individual responses were taken into account here, we used ICCs to determine longitudinal reliability at the individual level.</td></tr><tr><td align="left" valign="middle" colspan="3"><list list-type="bullet"><list-item><p><italic><bold>Within- and between-subject similarity</bold></italic></p></list-item></list></td><td align="left" valign="bottom" colspan="5">Similarity analyses provide information on the extent to which trial-by-trial responses of one individual at one time point are comparable to responses of<break/><list list-type="bullet"><list-item><p>the same individual at a later time point (i.e., within-subject similarity) and</p></list-item><list-item><p>all other individuals at a later time point (i.e., between-subject similarity).</p></list-item></list>Comparisons of within- and between-subject similarity were used here to determine longitudinal reliability at the individual level.</td></tr><tr><td align="left" valign="middle" colspan="3"><list list-type="bullet"><list-item><p><italic><bold>Overlap at the individual level (applied for BOLD fMRI only)</bold></italic></p></list-item></list></td><td align="left" valign="bottom" colspan="5">Overlap at the individual level reflects the <bold>degree of overlap of significant voxels</bold> between both time points <bold>for single subject-level activation patterns</bold>.</td></tr><tr><td align="left" valign="middle" colspan="3"><bold>Longitudinal reliability at the group level</bold></td><td align="left" valign="bottom" colspan="5">Longitudinal reliability at the group level indicates to which degree <bold>responses within the group as a whole are stable over time</bold>. More precisely, longitudinal reliability at the group level relies on first averaging all individuals responses for each trial (for SCR) or voxel (for fMRI) yielding a group average for each trial/voxel. These are then related across time points, that is the calculation is carried out using the trial-wise (for SCR) or voxel-wise (for fMRI) group averages.</td></tr><tr><td align="left" valign="middle" colspan="3"><list list-type="bullet"><list-item><p><italic><bold>Overlap at the group level (applied for BOLD fMRI only)</bold></italic></p></list-item></list></td><td align="left" valign="bottom" colspan="5">Overlap at the group level reflects the <bold>degree of overlap of significant voxels</bold> between both time points <bold>for aggregated group-level activations</bold>.</td></tr><tr><th align="left" valign="top" colspan="8">(B)</th></tr><tr><th align="left" valign="bottom"/><th align="center" valign="bottom">Measure</th><th align="center" valign="bottom">Internal consistency</th><th align="center" valign="bottom" colspan="3">Longitudinal reliability at the individual level</th><th align="center" valign="bottom">Longitudinal reliability at the group level</th><th align="center" valign="bottom">Cross-phases predictability</th></tr><tr><th align="left" valign="bottom"/><th align="left" valign="bottom"/><th align="left" valign="bottom"/><th align="center" valign="bottom"><italic>ICCs</italic></th><th align="center" valign="bottom"><italic>Within- and between-subject similarity</italic></th><th align="center" valign="bottom"><italic>Overlap</italic></th><th align="center" valign="bottom"><italic>Overlap (BOLD fMRI) or R squared (SCR)</italic></th><th align="left" valign="bottom"/></tr><tr><td align="left" valign="bottom"><bold>Included time points</bold></td><td align="left" valign="bottom">All</td><td align="left" valign="bottom">T0 and T1 separately</td><td align="left" valign="bottom">T0 and T1</td><td align="left" valign="bottom">T0 and T1</td><td align="left" valign="bottom">T0 and T1</td><td align="left" valign="bottom">T0 and T1</td><td align="left" valign="bottom">T0</td></tr><tr><td align="left" valign="bottom" rowspan="3"><bold>Included stimuli</bold></td><td align="left" valign="bottom">SCR</td><td align="left" valign="bottom">CS+, CS−, CS discrimination, US</td><td align="left" valign="bottom">CS+, CS−, CS discrimination, US<sup><xref ref-type="table-fn" rid="table1fn2">*</xref></sup></td><td align="left" valign="bottom">CS+, CS−, CS discrimination, US</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">CS+, CS−, CS discrimination, US</td><td align="left" valign="bottom">CS+, CS−, CS discrimination</td></tr><tr><td align="left" valign="bottom">Fear ratings</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">CS+, CS−, CS discrimination, US<sup><xref ref-type="table-fn" rid="table1fn2">*</xref></sup></td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">CS+, CS−, CS discrimination</td></tr><tr><td align="left" valign="bottom">BOLD fMRI</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">CS discrimination<sup><xref ref-type="table-fn" rid="table1fn3">†</xref></sup></td><td align="left" valign="bottom">CS discrimination<xref ref-type="table-fn" rid="table1fn3"><sup>†</sup></xref></td><td align="left" valign="bottom">CS discrimination<xref ref-type="table-fn" rid="table1fn3"><sup>†</sup></xref></td><td align="left" valign="bottom">CS discrimination<xref ref-type="table-fn" rid="table1fn3"><sup>†</sup></xref></td><td align="left" valign="bottom">CS+, CS−, CS discrimination</td></tr><tr><td align="left" valign="bottom" rowspan="3"><bold>Phase operationalizations</bold></td><td align="left" valign="bottom">SCR</td><td align="left" valign="bottom">Entire phases (ACQ, EXT, RI-Test; except first trials of ACQ and EXT)</td><td align="left" valign="bottom">CS+, CS−, and CS discrimination: average ACQ, last two trials ACQ<xref ref-type="table-fn" rid="table1fn4"><sup>‡</sup></xref>,<break/>first trial EXT<xref ref-type="table-fn" rid="table1fn5"><sup>§</sup></xref>, average EXT, last two trials EXT<xref ref-type="table-fn" rid="table1fn4"><sup>‡</sup></xref><sup><xref ref-type="table-fn" rid="table1fn6">¶</xref></sup>, first trial RI-Test<xref ref-type="table-fn" rid="table1fn5"><sup>§</sup></xref><break/>US: average RI</td><td align="left" valign="bottom">Average ACQ<xref ref-type="table-fn" rid="table1fn7">**</xref>, average EXT</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">Average ACQ, average EXT</td><td align="left" valign="bottom">Average ACQ, last two trials ACQ<xref ref-type="table-fn" rid="table1fn4"><sup>‡</sup></xref>, first trial EXT<xref ref-type="table-fn" rid="table1fn5"><sup>§</sup></xref>, average EXT, last two trials EXT<xref ref-type="table-fn" rid="table1fn4"><sup>‡</sup></xref> <xref ref-type="table-fn" rid="table1fn6"><sup>¶</sup></xref>, first trial RI-Test<xref ref-type="table-fn" rid="table1fn5"><sup>§</sup></xref></td></tr><tr><td align="left" valign="bottom">Fear ratings</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">CS+, CS−, and CS discrimination: post–pre ACQ, post ACQ, pre EXT, pre–post EXT, post EXT, first trial RI-Test<break/>US: post RI-Test</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">post–pre ACQ, post ACQ, pre EXT, pre–post EXT, post EXT, first trial RI-Test</td></tr><tr><td align="left" valign="bottom">BOLD fMRI<sup><xref ref-type="table-fn" rid="table1fn8">††</xref></sup></td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">Average ACQ, average EXT</td><td align="left" valign="bottom">Average ACQ, average EXT</td><td align="left" valign="bottom">Average ACQ, average EXT</td><td align="left" valign="bottom">Average ACQ, average EXT</td><td align="left" valign="bottom">Average ACQ, average EXT</td></tr><tr><td align="left" valign="bottom" rowspan="3"><bold>Transformations</bold> <sup><xref ref-type="table-fn" rid="table1fn9">‡ ‡</xref></sup></td><td align="left" valign="bottom">SCR</td><td align="left" valign="bottom">None,<break/>log-transformation<xref ref-type="table-fn" rid="table1fn10"><sup>§ §</sup></xref>,<break/>log-transformation and range correction<xref ref-type="table-fn" rid="table1fn11"><sup>¶ ¶</sup></xref></td><td align="left" valign="bottom">None,<break/>log-transformation<xref ref-type="table-fn" rid="table1fn10"><sup>§ §</sup></xref>,<break/>log-transformation and range correction<xref ref-type="table-fn" rid="table1fn11"><sup>¶ ¶</sup></xref></td><td align="left" valign="bottom">None<xref ref-type="table-fn" rid="table1fn12">***</xref></td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">None,<break/>log-transformation<xref ref-type="table-fn" rid="table1fn10"><sup>§ §</sup></xref>,<break/>log-transformation and range correction<xref ref-type="table-fn" rid="table1fn11"><sup>¶ ¶</sup></xref></td><td align="left" valign="bottom">None,<break/>log-transformation<xref ref-type="table-fn" rid="table1fn10"><sup>§ §</sup></xref>,<break/>log-transformation and range correction<xref ref-type="table-fn" rid="table1fn11"><sup>¶ ¶</sup></xref></td></tr><tr><td align="left" valign="bottom">Fear ratings</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">None</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">None</td></tr><tr><td align="left" valign="bottom">BOLD fMRI</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">None</td><td align="left" valign="bottom">None</td><td align="left" valign="bottom">None</td><td align="left" valign="bottom">None</td><td align="left" valign="bottom">None</td></tr><tr><td align="left" valign="bottom" rowspan="3"><bold>Ordinal ranking</bold><xref ref-type="table-fn" rid="table1fn13"><sup><bold>†††</bold></sup></xref></td><td align="left" valign="bottom">SCR</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking<xref ref-type="table-fn" rid="table1fn14"><sup>‡ ‡ ‡</sup></xref></td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking and ordinal ranking <xref ref-type="table-fn" rid="table1fn15"><sup>§ § §</sup></xref></td></tr><tr><td align="left" valign="bottom">Fear ratings</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">No ranking<xref ref-type="table-fn" rid="table1fn14"><sup>‡ ‡ ‡</sup></xref></td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">No ranking and ordinal ranking</td></tr><tr><td align="left" valign="bottom">BOLD fMRI</td><td align="left" valign="bottom">–</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking</td><td align="left" valign="bottom">No ranking</td></tr></tbody></table><table-wrap-foot><fn><p>The specifications we used here are exemplary and are not intended to cover all possible data specifications. Note that internal consistency, within- and between-subject similarity and reliability at the group level could not be calculated for fear ratings due to the limited number of trials. ACQ = acquisition training, EXT = extinction training, RI = reinstatement, RI-Test = reinstatement-test.</p></fn><fn id="table1fn2"><label>*</label><p>Non-pre-registered ICCs for SCRs to the USs and US aversiveness ratings were calculated as we considered these informative.</p></fn><fn id="table1fn3"><label>†</label><p>For BOLD fMRI, ICCs were calculated only for CS discrimination and not for CS+ and CS− given the fact that the calculations are based on first-level T contrast maps and contrasts against baseline are not optimal.</p></fn><fn id="table1fn4"><label>‡</label><p>In addition to the averaged acquisition and extinction training performance, the last two SCR trials of acquisition (pre-registered) and extinction training (not pre-registered) were separated from the previous trials and averaged as equivalent to the post-acquisition/-extinction ratings. The first extinction trial was taken into account separately as fear recall.</p></fn><fn id="table1fn5"><label>§</label><p>Fear recall and reinstatement-test were operationalized as the first extinction training trial and the first reinstatement-test trial (since the reinstatement effect fades away relatively quickly, <xref ref-type="bibr" rid="bib45">Haaker et al., 2014</xref>), respectively.</p></fn><fn id="table1fn6"><label>¶</label><p>The operationalization of extinction training as the last two trials was not pre-registered and included for completeness. In cases where phase operationalizations included more than one SCR trial, trials were averaged.</p></fn><fn id="table1fn7"><label>**</label><p>Note that reliability at a group level for SCRs during reinstatement-test was not calculated as correlations between two SCR data points are not meaningful.</p></fn><fn id="table1fn8"><label>††</label><p>fMRI data for the reinstatement-test were not analyzed in the current study since data from a single trial do not provide sufficient power.</p></fn><fn id="table1fn9"><label>‡ ‡</label><p>The pre-registered transformation types were identified to be typically employed data transformations in the literature by for example <xref ref-type="bibr" rid="bib112">Sjouwerman et al., 2022</xref> who also pre-registered these transformation types.</p></fn><fn id="table1fn10"><label>§ §</label><p>Raw SCR amplitudes were log-transformed by taking the natural logarithm to normalize the distribution (<xref ref-type="bibr" rid="bib63">Levine and Dunlap, 1982</xref>).</p></fn><fn id="table1fn11"><label>¶ ¶</label><p>Log-transformed SCR amplitudes were range corrected by dividing each individual SCR trial by the maximum SCR trial across all CS and US trials. Due to potentially different response ranges, the maximum SCR trial was determined separately for experimental days as recommended by <xref ref-type="bibr" rid="bib65">Lonsdorf et al., 2017a</xref>. Range correction was recommended to control for interindividual variability (<xref ref-type="bibr" rid="bib71">Lykken, 1972</xref>; <xref ref-type="bibr" rid="bib70">Lykken and Venables, 1971</xref>).</p></fn><fn id="table1fn12"><label>***</label><p>We also carried out similarity analyses for log-transformed as well as for log-transformed and range corrected data. However, results were almost identical to the results from the raw data. For reasons of space, we only report results for raw data.</p></fn><fn id="table1fn13"><label>†††</label><p>Ranking of the data was included to investigate to which degree individuals occupy the same ranks at both time points as pre-registered or put differently, whether the quality of predictions changes when the predictions were not based on the absolute values but on a coarser scale.</p></fn><fn id="table1fn14"><label>‡ ‡ ‡</label><p>As opposed to what was pre-registered, in ICC analyses, we included non-ranked data only as closer inspection of the conceptualization of ICC<sub>con</sub> revealed that it would be redundant to calculate both ICC<sub>abs</sub> and ICC<sub>con</sub> with ranked and non-ranked data as ICC<sub>con</sub> itself ranks the data.</p></fn><fn id="table1fn15"><label>§ § §</label><p>Ranks of SCRs were built upon raw, log-transformed as well as log-transformed and range corrected values.</p></fn></table-wrap-foot></table-wrap><p>Yet, in fear conditioning research, surprisingly little is known about longitudinal reliability at the individual level with time intervals ranging from 9 days to 8 months in prior work (<xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>; <xref ref-type="bibr" rid="bib17">Cooper et al., 2022b</xref>; <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>; <xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>; <xref ref-type="bibr" rid="bib122">Torrents-Rodas et al., 2014</xref>; <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>). Generally (details in <xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>), individual-level longitudinal reliability of risk ratings, skin conductance responses (SCRs), and fear potentiated startle (FPS) was within the same range (<xref ref-type="bibr" rid="bib17">Cooper et al., 2022b</xref>; <xref ref-type="bibr" rid="bib122">Torrents-Rodas et al., 2014</xref>) whereas it was numerically somewhat lower for the BOLD response as compared to different rating types (<xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>). Longitudinal reliability at the individual level appeared higher for acquisition training than for extinction training (SCRs: <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>; <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>), but comparable to generalization (<xref ref-type="bibr" rid="bib17">Cooper et al., 2022b</xref>; <xref ref-type="bibr" rid="bib122">Torrents-Rodas et al., 2014</xref>). Moreover, it appeared higher for extinction training than for reinstatement-test (for BOLD fMRI but not ratings: <xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>) and higher for CS+ than CS− responses (SCRs: <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>) and CS discrimination (ratings and BOLD fMRI: <xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>; SCRs: <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>).</p><p>However, it is difficult to extract a comprehensive picture from these five studies as they differ substantially in sample size (<italic>N</italic> = 18–100), paradigm specifications, experimental phases reported, outcome measures, time intervals, and employed reliability measures (see <xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>).</p><p>Given that the predominance of research on group-level generic mechanisms in fear conditioning research, it is even more surprising that, to our knowledge, no study to date has investigated longitudinal reliability at the group level and only few studies have (<xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>) targeted internal consistency (i.e., the degree to which all test items capture the same construct, see <xref ref-type="table" rid="table1">Table 1</xref>). More precisely, longitudinal reliability at the group level indicates the extent to which responses averaged across the group as a whole are stable over time, which is important to establish when investigating basic, generic principles such as the impact of experimental manipulations. Even though it has to be acknowledged that the group average is not necessarily representative of any individual in the group and the same group average may arise from different and even opposite individual responses at both time points in the same group, group-level reliability is important to establish in addition to individual-level reliability. Group-level reliability is relevant not only to work focusing on the understanding of general, generic processes but also for questions about differences between two groups of individuals such as patients vs. controls (e.g., see meta-analyses of <xref ref-type="bibr" rid="bib16">Cooper et al., 2022a</xref>; <xref ref-type="bibr" rid="bib23">Duits et al., 2015</xref>). Of note, many fear conditioning paradigms were initially developed to study general group-level processes and to elicit robust group effects (<xref ref-type="bibr" rid="bib66">Lonsdorf and Merz, 2017b</xref>). Hence, it is important to investigate both group- and individual-level reliability given the challenges of attempts to employ cognitive tasks that were originally designed to produce robust group effects in individual difference research (<xref ref-type="bibr" rid="bib26">Elliott et al., 2020</xref>; <xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>; <xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref>).</p><p>As pointed out above, individual-level reliability is a prerequisite for individual-level predictions such as treatment outcomes. Since the different experimental phases of fear conditioning paradigms serve as experimental models for the development, treatment, and relapse of anxiety- and stress-related disorders, it is also an important question whether responding across phases can be reliably predicted at the individual level. Interestingly, it is often implicitly assumed that responding in one experimental phase reliably predicts responding in a subsequent phase (e.g., see <xref ref-type="bibr" rid="bib79">Milad et al., 2009</xref>; critically discussed in <xref ref-type="bibr" rid="bib67">Lonsdorf et al., 2019a</xref>) even though empirical evidence is lacking. As a result it has been suggested to routinely ‘correct for responding’ during fear acquisition training when studying performance in later experimental phases such as extinction training or retention/RoF test (critically discussed in <xref ref-type="bibr" rid="bib67">Lonsdorf et al., 2019a</xref>). However, empirical evidence on this cross-phases predictability (for definition and terminology, see <xref ref-type="table" rid="table1">Table 1</xref>) is scarce to date.</p><p>Evidence from experimental work on cross-phase predictability in rodents and humans is mixed. In rodents, freezing during acquisition training and 24-hrs-delayed extinction training were uncorrelated (<xref ref-type="bibr" rid="bib93">Plendl and Wotjak, 2010</xref>) and responding during extinction training did not predict extinction retention (i.e., lever-pressing suppression: <xref ref-type="bibr" rid="bib14">Bouton et al., 2006</xref>; or freezing behavior: <xref ref-type="bibr" rid="bib110">Shumake et al., 2014</xref>). Similarly, in humans, extinction performance (FPS, SCRs, and US expectancy ratings) did not predict performance at 24-hrs-retention test (<xref ref-type="bibr" rid="bib96">Prenoveau et al., 2013</xref>). Yet, a computational modeling approach suggests that the mechanism of extinction learning (i.e., the formation of a new extinction memory trace in comparison to an update of the original fear memory trace) predicts the extent of spontaneous recovery in SCRs (<xref ref-type="bibr" rid="bib39">Gershman and Hartley, 2015</xref>).</p><p>Also evidence from work in patient samples is mixed (for a review, see <xref ref-type="bibr" rid="bib18">Craske et al., 2008</xref>). The extent of fear reduction within therapeutic sessions was unrelated to overall treatment outcome in some studies (<xref ref-type="bibr" rid="bib58">Kozak et al., 1988</xref>; <xref ref-type="bibr" rid="bib92">Pitman et al., 1996</xref>; <xref ref-type="bibr" rid="bib104">Riley et al., 1995</xref>), while others observed an association (<xref ref-type="bibr" rid="bib30">Foa et al., 1983</xref>). Similarly, significant correlations of fear reduction between therapeutic sessions with treatment outcome were observed for reported distress (<xref ref-type="bibr" rid="bib99">Rauch et al., 2004</xref>) and heart rate, but not for SCR (<xref ref-type="bibr" rid="bib58">Kozak et al., 1988</xref>; <xref ref-type="bibr" rid="bib62">Lang et al., 1970</xref>) and for self-reported fear post treatment, but not at follow-up (<xref ref-type="bibr" rid="bib30">Foa et al., 1983</xref>). In addition, evidence that responding in different phases is related comes from pharmacological manipulations with the cognitive enhancer <sc>D</sc>-cycloserine which facilitates learning and/or consolidation. <sc>D</sc>-cycloserine promoted long-term extinction retention (<xref ref-type="bibr" rid="bib105">Rothbaum et al., 2014</xref>; <xref ref-type="bibr" rid="bib113">Smits et al., 2013a</xref>; <xref ref-type="bibr" rid="bib114">Smits et al., 2013b</xref>) only if within-session learning was achieved.</p><p>With this pre-registered study, we follow the call for a stronger appreciation and more systematic investigations of measurement reliability (<xref ref-type="bibr" rid="bib146">Zuo et al., 2019</xref>). We address longitudinal reliability and internal consistency as well as predictability of cross-phase responding in SCRs, fear ratings, and the BOLD response. For this purpose, we reanalyzed data from 120 participants that underwent a differential fear conditioning paradigm twice (at time points T0 and T1, 6 months apart) – with habituation and acquisition training on day 1 and extinction, reinstatement and reinstatement-test on day 2 to allow for fear memory consolidation prior to extinction. Part of the data have been used previously in method focused work (<xref ref-type="bibr" rid="bib61">Kuhn et al., 2022</xref>; <xref ref-type="bibr" rid="bib69">Lonsdorf et al., 2022</xref>; <xref ref-type="bibr" rid="bib67">Lonsdorf et al., 2019a</xref>; <xref ref-type="bibr" rid="bib112">Sjouwerman et al., 2022</xref>) and work investigating the association of conditioned responding with brain morphological measures (<xref ref-type="bibr" rid="bib25">Ehlers et al., 2020</xref>).</p><p>Specifically, we (1) estimated internal consistency of SCRs at both time points and (2) systematically assessed longitudinal reliability of SCRs, fear ratings and BOLD fMRI at the individual level by calculating ICCs. This was complemented by investigations of response similarity (SCR and BOLD fMRI) and the degree of overlap of activated voxels at both time points (BOLD fMRI) as additional measurements of longitudinal reliability at the individual level that allow for a more detailed picture than the coarser ICCs (see <xref ref-type="table" rid="table1">Table 1</xref> for terminology and definitions). We also (3) assessed whether SCR and BOLD fMRI show longitudinal reliability at the group level. Finally, we (4) investigated if individual level responding during an experimental phase is predictive of individual-level responding during subsequent experimental phases. All hypotheses are tested across different pre-registered data specifications to account for procedural heterogeneity in the literature (see <xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>): More precisely, we follow a pre-registered multiverse-inspired approach and include (1) responses to the CS+, CS−, US, and CS discrimination, (2) different phase operationalizations, (3) different data transformations none, log-transformed, log-transformed and range-corrected, and (4) ordinally ranked vs. non-ranked data (for justification of these choices, see <xref ref-type="table" rid="table1">Table 1</xref>). We acknowledge that the specifications used here are not intended to cover all potentially meaningful combinations as in a full multiverse study (<xref ref-type="bibr" rid="bib69">Lonsdorf et al., 2022</xref>; <xref ref-type="bibr" rid="bib112">Sjouwerman et al., 2022</xref>; <xref ref-type="bibr" rid="bib118">Steegen et al., 2016</xref>) but can be viewed as a manyverse (<xref ref-type="bibr" rid="bib61">Kuhn et al., 2022</xref>) in which we a priori pre-registered a number of meaningful combinations.</p></sec><sec id="s2" sec-type="results"><title>Results</title><p>For a comprehensive overview of the different reliability measures used here and of the analyses conducted, see <xref ref-type="table" rid="table1">Table 1</xref>.</p><sec id="s2-1"><title>Satisfactory internal consistency</title><p>To assess internal consistency of SCRs, trials were split into odd and even trials (i.e., odd–even approach), averaged for each individual subject and then correlated (Pearson’s correlation coefficient). This was done separately for each time point and experimental phase. Internal consistency at T0 (see <xref ref-type="fig" rid="fig1">Figure 1A</xref>) and T1 (see <xref ref-type="fig" rid="fig1">Figure 1B</xref>) of raw SCRs to the CS+ and CS− ranged from 0.54 to 0.85 and for raw SCRs to the US from 0.91 to 0.94 for all phases. In comparison, internal consistency was lower for CS discrimination with values ranging from −0.01 to 0.60. Log-transformation did not impact internal consistency but log-transformation in combination with range correction largely resulted in reduced reliability (see <xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1</xref>).</p><fig-group><fig id="fig1" position="float"><label>Figure 1.</label><caption><title>Illustration of internal consistency for skin conductance responses (SCRs) at T0 (<bold>A</bold>) and T1 (<bold>B</bold>) as well as ICC<sub>abs</sub> and ICC<sub>con</sub> for SCRs (<bold>C, D</bold>) and fear ratings (<bold>E, F</bold>) color coded for stimulus type.</title><p>Internal consistency indicates the reliability of responses within each time point, while intraclass correlation coefficients (ICCs) indicate the reliability across both time points. Note that assessment of internal consistency was not possible for fear ratings as only two ratings (pre, post) were available. Error bars represent 95% confidence intervals and indicate significance, when zero is not included in the interval. The <italic>y</italic>-axis comprises the different phases or phase operationalizations. In the literature, internal consistency is often interpreted using benchmarks (<xref ref-type="bibr" rid="bib55">Kline, 2013</xref>) for unacceptable (<0.5), poor (>0.5 but <0.6), questionable (>0.6 but <0.7), acceptable (>0.7 but <0.8), good (>0.8 but <0.9), and excellent (≥0.9). Common benchmarks in the literature for ICCs are poor (<0.5), moderate (>0.5 but <0.75), good (>0.75 but <0.9), and excellent (≥0.9) (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>). These benchmarks are included here to provide a frame of reference but we point out that these benchmarks are arbitrary and most importantly derived from psychometric work on trait self-report measures and should hence not be overinterpreted in the context of responding in experimental paradigms which bear more sources of noise (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>). ACQ = acquisition training, EXT = extinction training, RI = reinstatement, RI-Test = reinstatement-test, pre = prior to the experimental phase, post = subsequent to the experimental phase.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1.jpg"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 1.</label><caption><title>Illustration of (<bold>A, B</bold>) internal consistency for log-transformed (log) as well as (<bold>C, D</bold>) log-transformed and range corrected (log rc) skin conductance responses (SCRs) at T0 and T1 color coded for stimulus type.</title><p>Error bars represent 95% confidence intervals and indicate significance, when zero is not included in the interval. The <italic>y</italic>-axis comprises the different experimental phases. Internal consistency is interpreted using benchmarks (<xref ref-type="bibr" rid="bib55">Kline, 2013</xref>) for unacceptable (<0.5), poor (>0.5 but <0.6), questionable (>0.6 but <0.7), acceptable (>0.7 but <0.8), good (>0.8 but <0.9), and excellent (≥0.9). ACQ = acquisition training, EXT = extinction training, RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp1.jpg"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 2.</label><caption><title>Illustration of (<bold>A, B</bold>) intraclass correlation coefficients (ICCs) of log-transformed (log) as well as (<bold>C, D</bold>) log-transformed and range corrected (log, rc) skin conductance responses (SCRs) color coded for stimulus type.</title><p>The <italic>y</italic>-axis comprises the different phase operationalizations. A and C display ICC<sub>abs</sub>, B and D display ICC<sub>con</sub>. ICCs <0.5, <0.75, <0.9, and >0.9 were interpreted as poor, moderate, good, and excellent, respectively (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>). Error bars represent 95% confidence intervals and indicate significance of ICCs, when zero is not included in the interval. ACQ = acquisition training, EXT = extinction training, RI = reinstatement, RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp2.jpg"/></fig><fig id="fig1s3" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 3.</label><caption><title>Illustration of ICC<sub>abs</sub> of trial-by-trial raw skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good, and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp3.jpg"/></fig><fig id="fig1s4" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 4.</label><caption><title>Illustration of ICC<sub>con</sub> of trial-by-trial raw skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good, and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp4.jpg"/></fig><fig id="fig1s5" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 5.</label><caption><title>Illustration of ICC<sub>abs</sub> of trial-by-trial log-transformed skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good, and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp5.jpg"/></fig><fig id="fig1s6" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 6.</label><caption><title>Illustration of ICC<sub>con</sub> of trial-by-trial log-transformed skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp6.jpg"/></fig><fig id="fig1s7" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 7.</label><caption><title>Illustration of ICC<sub>abs</sub> of trial-by-trial log-transformed and range corrected skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good, and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp7.jpg"/></fig><fig id="fig1s8" position="float" specific-use="child-fig"><label>Figure 1—figure supplement 8.</label><caption><title>Illustration of ICC<sub>con</sub> of trial-by-trial log-transformed and range corrected skin conductance responses (SCRs) for phases (A–D: Acquisition, E–G: Extinction, H–J: Reinstatement-Test, K: Reinstatement) and stimulus types separately.</title><p>Trials were averaged starting with the first (i.e., reinstatement-test and unconditioned stimulus [US] trials) or second trial (i.e., acquisition and extinction training). All preceding trials were added trial-by-trial and averaged. Intraclass correlation coefficients (ICCs) <0.5, <0.75, <0.9, and >0.9 (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>) were interpreted as poor, moderate, good, and excellent, respectively. Error bars represent 95% confidence intervals. Non-overlapping error bars indicate significant differences between ICCs within one figure. RI-Test = reinstatement-test.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig1-figsupp8.jpg"/></fig></fig-group></sec><sec id="s2-2"><title>Longitudinal reliability at the individual level</title><p>Longitudinal reliability at the individual level refers to the time stability of individual responses which we assessed through several measures (see <xref ref-type="table" rid="table1">Table 1</xref>).</p><p>As a first measure, absolute agreement ICCs (ICC<sub>abs</sub>) and consistency ICCs (ICC<sub>con</sub>) were calculated across both time points (T0, T1) for all data specifications (see <xref ref-type="fig" rid="fig1">Figure 1</xref>) while for BOLD fMRI these were only calculated for CS discrimination (see Materials and methods for justification). While ICC<sub>abs</sub> refers to the extent to which measurements at T0 correspond with measurements at T1 in absolute terms, ICC<sub>con</sub> allows for deviations at T1 due to systematic error (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>).</p><p>Note that internal consistency and ICCs for SCRs are shown for raw data only. Results of log-transformed as well as log-transformed and range corrected data are presented in <xref ref-type="fig" rid="fig1s1">Figure 1—figure supplement 1</xref> and <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref> for completeness.</p><sec id="s2-2-1"><title>SCR and fear ratings</title><p>Across data specifications, ICC<sub>abs</sub> and ICC<sub>con</sub> ranged from 0.03 to 0.58 and 0.03 to 0.61 for SCRs and from -0.16 to 0.70 as well as from -0.19 to 0.70 for fear ratings respectively (see <xref ref-type="fig" rid="fig1">Figure 1</xref>, for detailed results see also <xref ref-type="supplementary-material" rid="supp3">Supplementary file 3</xref> and <xref ref-type="supplementary-material" rid="supp4">Supplementary file 4</xref>). ICCs for log-transformed and raw SCRs were similar (see <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2A-B</xref>) while log-transformation and range correction resulted in increased reliability for some data specifications (e.g., CS+ and CS- responses averaged across acquisition training, see <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2C-D</xref>) but in reduced reliability for others (e.g., CS- responses during fear recall, i.e., the first extinction trial).</p><p>Exploratory, non-pre-registered analyses of trial-by-trial SCRs revealed, overall, only minor changes in ICCs upon stepwise inclusion of additional SCR trials (see <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplements 3</xref>–<xref ref-type="fig" rid="fig1s8">8</xref>) with few exceptions: Including more trials resulted in an increase of ICC point estimates for SCRs to the CS+ and CS− during acquisition (log-transformed and range corrected data) and extinction training (all transformation types). Note, however, that this was – at large – only statistically significant when comparing ICCs based on the first (i.e., single trial at T0 and T1) and the maximum number of trials (as indicated by non-overlapping 95% confidence interval [CI] error bars). Interestingly, ICC point estimates for reinstatement-test (all transformation types) were numerically lower with an increasing number of trials, likely because of the transitory nature of the reinstatement effect (<xref ref-type="bibr" rid="bib45">Haaker et al., 2014</xref>).</p></sec><sec id="s2-2-2"><title>BOLD fMRI</title><p>For BOLD fMRI, both ICC types suggest rather limited reliability for CS discrimination during acquisition (both ICC<sub>abs</sub> and ICC<sub>con</sub> = 0.17) and extinction training (both ICC<sub>abs</sub> and ICC<sub>con</sub> = 0.01). For individual regions of interest (ROIs: anterior insula, amygdala, hippocampus, caudate nucleus, putamen, pallidum, nucleus accumbens [NAcc], thalamus, dorsal anterior cingulate cortex [dACC], dorsolateral prefrontal cortex [dlPFC], and ventromedial prefrontal cortex [vmPFC]), ICCs were even lower (all ICCs ≤0.001; for full results see <xref ref-type="supplementary-material" rid="supp5">Supplementary file 5</xref>).</p></sec></sec><sec id="s2-3"><title>Higher within- than between-subject similarity in BOLD fMRI but not SCRs</title><p>While ICCs provide information on the absolute quantity of longitudinal reliability at the individual level, comparison of within- and between-subject similarity as a complementary measure of longitudinal reliability at the individual level (see <xref ref-type="table" rid="table1">Table 1</xref>) reflects the extent to which responses in SCR and BOLD activation of one individual at T0 were more similar to themselves at T1 than to other individuals at T1 (see <xref ref-type="fig" rid="fig2">Figures 2</xref> and <xref ref-type="fig" rid="fig3">3</xref>).</p><fig id="fig2" position="float"><label>Figure 2.</label><caption><title>Illustration of within- and between-subject similarity for raw skin conductance responses (SCRs) during (<bold>A</bold>) acquisition and (<bold>B</bold>) extinction training separately for CS discrimination (gray), CS+ (red), CS− (blue), and unconditioned stimulus (US) responses (yellow).</title><p>Results for log-transformed as well as log-transformed and range corrected SCRs were almost identical to the results from raw data and are hence not reported here. Single data points represent Fisher <italic>r</italic>-to-<italic>z</italic> transformed correlations between single trial SCRs of each subject at T0 and T1 (within-subject similarity) or averaged <italic>r</italic>-to-<italic>z</italic> transformed correlations between single trial SCRs of one subject at T0 and all other subjects at T1 (between-subject similarity). Triangles represent mean correlations, corresponding error bars represent 95% confidence intervals. Boxes of boxplots represent the interquartile range (IQR) crossed by the median as bold line, ends of whiskers represent the minimum/maximum value in the data within the range of 25th/75th percentiles ±1.5 IQR. Distributions of the data are illustrated by densities next to the boxplots. One data point had a similarity above 3.5 (within-subject similarity of SCRs to the CS+) and is not shown in the figure. *p < 0.05. Note that the variances differ strongly between within- and between-subject similarity because between-subject similarity is based on correlations averaged across subjects, whereas within-subject similarity is based on non-averaged correlations calculated for each subject. Note also that similarity calculations were based on different sample sizes for acquisition and extinction training and CS discrimination as well as SCRs to the CS+, CS−, and US, respectively (for details, see Materials and methods). within-sub = within-subject; between-sub = between-subject.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig2.jpg"/></fig><fig id="fig3" position="float"><label>Figure 3.</label><caption><title>Acquisition (<bold>A</bold>) and extinction (<bold>B</bold>) training within- and between-subject similarities (Fisher <italic>r</italic>-to-<italic>z</italic> transformed) of voxel-wise brain activation patterns (based on beta maps) for CS discrimination at T0 and T1 for the whole brain and different regions of interest (ROIs).</title><p>Triangles represent mean correlations, corresponding error bars represent 95% confidence intervals. Single data points represent Fisher <italic>r</italic>-to-<italic>z</italic> transformed correlations between the first-level response patterns of brain activation of each subject at T0 and T1 (within-subject similarity) or averaged <italic>r</italic>-to-<italic>z</italic> transformed correlations between the first-level response patterns of brain activation of one subject at T0 and all other subjects at T1 (between-subject similarity). Boxes of boxplots represent the interquartile range (IQR) crossed by the median as bold line, ends of whiskers represent the minimum/maximum value in the data within the range of 25th/75th percentiles ±1.5 IQR. Distributions of the data are illustrated with densities next to the boxplots. fMRI data for the reinstatement-test were not analyzed in the current study since data from a single trial do not provide sufficient power. *p < 0.05, **p < 0.01, ***p < 0.001. NAcc = nucleus accumbens; dACC = dorsal anterior cingulate cortex; dlPFC = dorsolateral prefrontal cortex; vmPFC = ventromedial prefrontal cortex; within-sub = within-subject; between-sub = between-subject.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig3.jpg"/></fig><sec id="s2-3-1"><title>SCR</title><p>For SCRs, within-subject similarity (i.e., within-subject correlation of trial-by-trial SCR across time points) and between-subject similarity (i.e., correlation of trial-by-trial SCR between one individual at T0 and all other individuals at T1; see <xref ref-type="fig" rid="fig2">Figure 2</xref>) did not differ significantly for most data specifications. This was true for CS discrimination (<italic>t</italic>(64) = 1.78, p = 0.079, <italic>d</italic> = 0.22) as well as for SCRs to the CS+ (<italic>t</italic>(61) = 0.84, p = 0.407, <italic>d</italic> = 0.11) and CS− (<italic>t</italic>(55) = 1.50, p = 0.138, <italic>d</italic> = 0.20) during acquisition training and for CS discrimination (<italic>t</italic>(44) = −0.23, p = 0.823, <italic>d</italic> = −0.03) and SCRs to the CS+ (<italic>t</italic>(39) = 0.25, p = 0.801, <italic>d</italic> = 0.04) during extinction training. This indicates that SCRs of one particular individual at T0 were mostly not more similar to their own SCRs than to those of other individuals at T1. The only exceptions where within-subject similarities were significantly higher than between-subject similarity were SCRs to the US during acquisition training (<italic>t</italic>(70) = 2.54, p = 0.013, <italic>d</italic> = 0.30) and to the CS− during extinction training (<italic>t</italic>(31) = 2.05, p = 0.049, <italic>d</italic> = 0.36). Note, however, that within-subject similarity had a very wide spread pointing to substantial individual differences (while this variance is removed in calculations of between-subject similarity).</p></sec><sec id="s2-3-2"><title>fMRI data</title><p>In contrast to what was observed for SCRs, within-subject similarity was significantly higher than between-subject similarity in the whole brain (p < 0.001) and most of the ROIs for fear acquisition training (see <xref ref-type="fig" rid="fig3">Figure 3A</xref> and <xref ref-type="supplementary-material" rid="supp6">Supplementary file 6</xref>). This suggests that while absolute values for similarity might be low, individual brain activation patterns during fear acquisition training at T0 were – at large – still more similar to the same subject’s activation pattern at T1 than to any others at T1. For extinction training, however, no significant differences between within- and between-subject similarity were found for any ROI or the whole brain (all p’s > 0.306; see <xref ref-type="fig" rid="fig3">Figure 3B</xref> and <xref ref-type="supplementary-material" rid="supp6">Supplementary file 6</xref>).</p></sec></sec><sec id="s2-4"><title>Low overlap at the individual level between both time points</title><p>As opposed to similarity measures (see above) which reflect the correlation of activated voxels between time points, overlap at the individual level denotes the degree of overlap of significantly activated voxels.</p><p>The overlap at the individual level was low with the Jaccard coefficient indicating 7.60% and 0.70% whole brain overlap for acquisition and extinction training, respectively (see <xref ref-type="table" rid="table2">Table 2A</xref>). Of note, individual values ranged from 0% to 39.65% overlap during acquisition, suggesting large interindividual differences in overlap.</p><p>While overlap during acquisition for individual ROIs was comparable to the whole brain, Jaccard and Dice coefficients indicate close to 0 overlap at extinction (see <xref ref-type="table" rid="table2">Table 2A</xref>).</p><table-wrap id="table2" position="float"><label>Table 2.</label><caption><title>Overlap in significantly activated voxels at the individual and group level across both time points for CS discrimination.</title></caption><table frame="hsides" rules="groups"><thead><tr><th align="left" valign="bottom" rowspan="2">Level</th><th align="left" valign="bottom" rowspan="2">Phase</th><th align="left" valign="bottom" rowspan="2">Coeff.</th><th align="center" valign="bottom" colspan="12">ROI</th></tr><tr><th align="left" valign="bottom">Whole brain</th><th align="left" valign="bottom">Insula</th><th align="left" valign="bottom">Amygdala</th><th align="left" valign="bottom">Hippocampus</th><th align="left" valign="bottom">Caudate</th><th align="left" valign="bottom">Putamen</th><th align="left" valign="bottom">Pallidum</th><th align="left" valign="bottom">Accumbens</th><th align="left" valign="bottom">Thalamus</th><th align="left" valign="bottom">dACC</th><th align="left" valign="bottom">dlPFC</th><th align="left" valign="bottom">vmPFC</th></tr></thead><tbody><tr><td align="left" valign="bottom" rowspan="4">(A) Individual</td><td align="left" valign="bottom" rowspan="2">Acq</td><td align="left" valign="bottom">Jaccard</td><td align="char" char="." valign="bottom">0.076</td><td align="char" char="." valign="bottom">0.075</td><td align="char" char="." valign="bottom">0.011</td><td align="char" char="." valign="bottom">0.012</td><td align="char" char="." valign="bottom">0.039</td><td align="char" char="." valign="bottom">0.037</td><td align="char" char="." valign="bottom">0.018</td><td align="char" char="." valign="bottom">0.017</td><td align="char" char="." valign="bottom">0.033</td><td align="char" char="." valign="bottom">0.132</td><td align="char" char="." valign="bottom">0.080</td><td align="char" char="." valign="bottom">0.039</td></tr><tr><td align="left" valign="bottom">Dice</td><td align="char" char="." valign="bottom">0.131</td><td align="char" char="." valign="bottom">0.121</td><td align="char" char="." valign="bottom">0.018</td><td align="char" char="." valign="bottom">0.021</td><td align="char" char="." valign="bottom">0.057</td><td align="char" char="." valign="bottom">0.058</td><td align="char" char="." valign="bottom">0.029</td><td align="char" char="." valign="bottom">0.024</td><td align="char" char="." valign="bottom">0.055</td><td align="char" char="." valign="bottom">0.189</td><td align="char" char="." valign="bottom">0.118</td><td align="char" char="." valign="bottom">0.061</td></tr><tr><td align="left" valign="bottom" rowspan="2">Ext</td><td align="left" valign="bottom">Jaccard</td><td align="char" char="." valign="bottom">0.007</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.003</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.005</td></tr><tr><td align="left" valign="bottom">Dice</td><td align="char" char="." valign="bottom">0.014</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.001</td><td align="char" char="." valign="bottom">0.006</td><td align="char" char="." valign="bottom">0.002</td><td align="char" char="." valign="bottom">0.009</td></tr><tr><td align="left" valign="bottom" rowspan="4">(B) Group</td><td align="left" valign="bottom" rowspan="2">Acq</td><td align="left" valign="bottom">Jaccard</td><td align="char" char="." valign="bottom">0.620</td><td align="char" char="." valign="bottom">0.595</td><td align="char" char="." valign="bottom">0.294</td><td align="char" char="." valign="bottom">0.323</td><td align="char" char="." valign="bottom">0.613</td><td align="char" char="." valign="bottom">0.740</td><td align="char" char="." valign="bottom">0.747</td><td align="char" char="." valign="bottom">0.441</td><td align="char" char="." valign="bottom">0.834</td><td align="char" char="." valign="bottom">0.898</td><td align="char" char="." valign="bottom">0.895</td><td align="char" char="." valign="bottom">0.045</td></tr><tr><td align="left" valign="bottom">Dice</td><td align="char" char="." valign="bottom">0.765</td><td align="char" char="." valign="bottom">0.745</td><td align="char" char="." valign="bottom">0.448</td><td align="char" char="." valign="bottom">0.472</td><td align="char" char="." valign="bottom">0.760</td><td align="char" char="." valign="bottom">0.847</td><td align="char" char="." valign="bottom">0.855</td><td align="char" char="." valign="bottom">0.595</td><td align="char" char="." valign="bottom">0.910</td><td align="char" char="." valign="bottom">0.946</td><td align="char" char="." valign="bottom">0.944</td><td align="char" char="." valign="bottom">0.086</td></tr><tr><td align="left" valign="bottom" rowspan="2">Ext</td><td align="left" valign="bottom">Jaccard</td><td align="char" char="." valign="bottom">0.057</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.044</td><td align="char" char="." valign="bottom">0.014</td><td align="char" char="." valign="bottom">0.000</td></tr><tr><td align="left" valign="bottom">Dice</td><td align="char" char="." valign="bottom">0.108</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.000</td><td align="char" char="." valign="bottom">0.085</td><td align="char" char="." valign="bottom">0.028</td><td align="char" char="." valign="bottom">0.000</td></tr></tbody></table><table-wrap-foot><fn><p><bold>Note</bold>. Results are shown for the whole brain as well as for selected regions of interest (ROIs) for fear acquisition training and extinction training. Both coefficients range from 0 (no overlap) to 1 (perfect overlap). Note that the Jaccard can be interpreted as % (<xref ref-type="bibr" rid="bib73">Maitra, 2010</xref>). NAcc = nucleus accumbens; dACC = dorsal anterior cingulate cortex; dlPFC = dorsolateral prefrontal cortex; vmPFC = ventromedial prefrontal cortex.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-5"><title>Robust longitudinal reliability at the group level</title><p>While longitudinal reliability at the individual level relies on (mean) individual subject responding at both time points, longitudinal reliability at the group level relies on the percentage of explained variance of group averaged trials at T1 by group averaged trials at T0 (i.e., <italic>R</italic> squared for SCR) or the degree of group level overlap of significant voxels expressed as Dice and Jaccard indices (i.e., BOLD fMRI).</p><sec id="s2-5-1"><title>SCR</title><p>For acquisition training (see <xref ref-type="fig" rid="fig4">Figure 4A</xref>), 40.66% (<inline-formula><mml:math id="inf1"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>7.54</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.019</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>), 63.59% (<inline-formula><mml:math id="inf3"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>19.21</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.001</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) and 75.67% (<inline-formula><mml:math id="inf5"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>34.20</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo><</mml:mo><mml:mn>0.001</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the variance of SCRs at T1 could be explained by SCRs at T0 for CS discrimination, CS+ and CS−, respectively, indicating robust longitudinal reliability of SCRs at the group level for CS responding during acquisition. Interestingly, only 19.53% (<inline-formula><mml:math id="inf7"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,12</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>2.91</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.114</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the variance of SCRs to the US could be explained. For extinction training, in contrast, only 19.58% (<inline-formula><mml:math id="inf9"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>2.68</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.130</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) and 21.70% (<inline-formula><mml:math id="inf11"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>3.05</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.109</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the SCR variance at T1 could be explained by SCRs at T0 for CS discrimination and CS+, respectively, indicating only limited longitudinal reliability at the group level. However, with 67.35% (<inline-formula><mml:math id="inf13"><mml:mi>F</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>1,11</mml:mn></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn>22.69</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="inf14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mo>=</mml:mo><mml:mn>0.001</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) explained variance at T1, longitudinal reliability of SCRs to the CS− appeared to be more robust as compared to CS discrimination and responses to the CS+ (see <xref ref-type="fig" rid="fig4">Figure 4B</xref>).</p><fig-group><fig id="fig4" position="float"><label>Figure 4.</label><caption><title>Scatter plots illustrating longitudinal reliability at the group level during (<bold>A</bold>) acquisition and (<bold>B</bold>) extinction training for raw skin conductance responses (SCRs) (in μS).</title><p>Results for log-transformed as well as log-transformed and range corrected data are presented in <xref ref-type="fig" rid="fig4s1">Figure 4—figure supplement 1</xref>. Longitudinal reliability at the group level refers to the extent of explained variance in linear regressions comprising SCRs at T0 as independent and SCRs at T1 as dependent variable. Results are shown for trial-by-trial group average SCRs to the CS+ (red), CS− (blue), the unconditioned stimulus (US; yellow), and CS discrimination (black). Single data points represent pairs of single trials at T0 and T1 averaged across participants. Note that no US was presented during extinction training and hence, no reliability of the US is shown in (<bold>B</bold>).</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig4.jpg"/></fig><fig id="fig4s1" position="float" specific-use="child-fig"><label>Figure 4—figure supplement 1.</label><caption><title>Scatter plots illustrating longitudinal reliability at the group level during (<bold>A, C</bold>) acquisition and (<bold>B, D</bold>) extinction training for log-transformed (<bold>A, B</bold>) as well as log-transformed and range corrected (<bold>C, D</bold>) skin conductance responses (SCRs).</title><p>Longitudinal reliability at the group level refers to the explained variance in linear regressions comprising SCRs at T0 as independent and SCRs at T1 as dependent variable. Results are shown for trial-by-trial group average SCRs to the CS+ (red), CS− (blue), the unconditioned stimulus (US; yellow), and CS discrimination (black). Single data points represent pairs of single trials at T0 and T1 averaged across participants. Note that no US was presented during extinction training and hence, no reliability of the US is shown in (<bold>B</bold>) and (<bold>D</bold>).</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig4-figsupp1.jpg"/></fig></fig-group></sec><sec id="s2-5-2"><title>BOLD fMRI</title><p>In stark contrast to the low overlap of individual-level activation (see <xref ref-type="table" rid="table2">Table 2A</xref>), the overlap at the group level was rather high with 62.00% for the whole brain and up to 89.80% for individual ROIs (i.e., dACC and dlPFC; Jaccard) for CS discrimination during acquisition training (see <xref ref-type="table" rid="table2">Table 2B</xref>). Similar to what was observed for overlap at the individual level, a much lower overlap for extinction training as compared to acquisition training was observed for the whole brain (5.70% overlap) and all ROIs (all close to zero).</p></sec></sec><sec id="s2-6"><title>Cross-phases predictability of conditioned responding</title><p>Finally, we investigated if responding in any given experimental phase predicted responding in subsequent experimental phases. To this end, simple linear regressions with robust standard errors were computed for both SCRs and fear ratings and all data specifications (see <xref ref-type="fig" rid="fig5">Figure 5</xref> and <xref ref-type="supplementary-material" rid="supp7">Supplementary file 7</xref>, <xref ref-type="supplementary-material" rid="supp8">Supplementary file 8</xref>). To approximate these analyses, correlations of patterns of BOLD brain activation between experimental phases were calculated (see <xref ref-type="fig" rid="fig6">Figure 6</xref>).</p><fig-group><fig id="fig5" position="float"><label>Figure 5.</label><caption><title>Illustration of standardized betas derived from regressions including skin conductance responses (SCRs) (<bold>A</bold>) and fear ratings (<bold>B</bold>) for all data specifications.</title><p>Colored cells indicate statistical significance of standardized betas, non-colored cells indicate non-significance. Standardized betas are color coded for their direction and magnitude showing positive values from yellow to red and negative values from light blue to dark blue. Darker colors indicate higher betas. On the <italic>y</italic>-axis, the following data specifications are plotted from left to right: CS type, ranking of the data and transformation of the data. On the <italic>x</italic>-axis, the following information is plotted: Number of the columns for better orientation, predictor, and criterion included in the regression. For example, the beta value at the top left in (<bold>A</bold>) (i.e., 0.196) is the standardized beta as retrieved from the linear regression including CS discrimination in non-ranked and raw SCRs during average acquisition as predictor and the first extinction trial as criterion. For exploratory non-preregistered regressions including a small manyverse of approximations of SCR extinction training learning rates, see <xref ref-type="fig" rid="fig5s1">Figure 5—figure supplement 1</xref>. Tables containing regression parameters beyond the standardized betas depicted in panels A and B are presented in <xref ref-type="supplementary-material" rid="supp7">Supplementary file 7</xref> and <xref ref-type="supplementary-material" rid="supp8">Supplementary file 8</xref>. AVE = average, LOG = log-transformed data, LOG.RC = log-transformed and range corrected data, not ordinal = not ordinally ranked data, ordinal = ordinally ranked data.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig5.jpg"/></fig><fig id="fig5s1" position="float" specific-use="child-fig"><label>Figure 5—figure supplement 1.</label><caption><title>As per reviewer’s request, we illustrate standardized betas derived from non-pre-registered regressions including skin conductance response (SCR) extinction training learning rates (LR EXT).</title><p>As there is no agreed upon approach, we provide a small manyverse of approximations of extinction learning rates. We subtracted (1) the last extinction trial from the first extinction trial (i.e., for CS discrimination during the first and last trial, for CS+ and for CS−, respectively; LR EXT 1, columns 1–3), (2) the last two extinction trials from the first two extinction trials (LR EXT 2, columns 4–6), (3) the last quarter of trials from the first quarter of trials (i.e., four trials; LR EXT 4, columns 7–9), and (4) the last half from the first half of trials (i.e., seven trials; LR EXT H, columns 10–12). We acknowledge that learning rates have been inferred through different approaches in the literature (see e.g., <xref ref-type="bibr" rid="bib84">Ney et al., 2020</xref>; <xref ref-type="bibr" rid="bib85">Ney et al., 2022</xref>) and are often calculated from model-based approaches such as Rescorla Wagner Model (<xref ref-type="bibr" rid="bib108">Seel, 2012</xref>) and hence our operationalizations are only four out of multiple equally justifiable options. Colored cells indicate statistical significance of standardized betas, non-colored cells indicate non-significance. Standardized betas are color coded for their direction and magnitude showing positive values from yellow to red and negative values from light blue to dark blue. Darker colors indicate higher betas. AVE = average, LOG = log-transformed data, LOG.RC = log-transformed and range corrected data, not ordinal = not ordinally ranked data, ordinal = ordinally ranked data.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig5-figsupp1.jpg"/></fig></fig-group><fig id="fig6" position="float"><label>Figure 6.</label><caption><title>Illustration of standardized betas derived from correlation analyses between brain activation patterns during acquisition and extinction training in different regions of interest (ROIs) and different data specifications.</title><p>Standardized betas are color coded for their direction and magnitude showing positive values from yellow to red and negative values from light blue to dark blue. Darker colors indicate higher betas. NAcc = nucleus accumbens; dACC = dorsal anterior cingulate cortex; dlPFC = dorsolateral prefrontal cortex; vmPFC = ventromedial prefrontal cortex.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig6.jpg"/></fig><sec id="s2-6-1"><title>SCR</title><p>Stronger CS discrimination in SCRs during (delayed) fear recall (i.e., first trial of extinction training) was significantly predicted by both average and end-point performance (i.e., last two trials) during acquisition training for most data specifications (<xref ref-type="fig" rid="fig5">Figure 5A</xref>, columns 1 and 2). In contrast, average CS discrimination during extinction training was significantly predicted by acquisition training performance only if data were ordinally ranked (columns 3 and 4). Strikingly, all predictions of extinction end-point performance (columns 5 and 6) as well as performance at reinstatement-test (columns 7–11) were non-significant – irrespective of phase operationalizations and data transformation.</p><p>The majority of predictions of SCRs to the CS+ and CS− were significant with few exceptions (see white cells in <xref ref-type="fig" rid="fig5">Figure 5A</xref>) – irrespective of experimental phases, their operationalization and data transformation. Most non-significant regressions included log-transformed and range corrected data. Strikingly, extinction end-point performance never predicted performance at reinstatement-test – irrespective of data transformation (column 11).</p></sec><sec id="s2-6-2"><title>Fear ratings</title><p>Higher ratings for the CS+ as well as higher CS discrimination during acquisition training predicted higher CS+ ratings and CS discrimination at fear recall (<xref ref-type="fig" rid="fig5">Figure 5B</xref>, columns 1 and 2), extinction training (columns 3 and 4), and at reinstatement-test (columns 7 and 8). Higher responding to the CS+ and higher CS discrimination at fear recall predicted higher responding at reinstatement-test (column 9) – irrespective of data transformations. In contrast, predictions of CS discrimination and CS+ ratings after extinction training were mostly non-significant (columns 5 and 6). Higher CS+ ratings during extinction training significantly predicted higher ratings at reinstatement-test which was not true for CS discrimination (columns 10 and 11).</p><p>Higher CS− ratings after acquisition training predicted higher CS− ratings at fear recall as well as after extinction training and CS− ratings after extinction training predicted the performance at reinstatement-test – irrespective of ranking of the data (columns 2, 6, and 11). Furthermore, when based on ordinally ranked data, the difference between ratings prior to and after acquisition predicted CS− ratings at fear recall and CS− ratings after acquisition training predicted the difference between CS− ratings prior to and after extinction training (columns 1 and 4). All other predictions were non-significant.</p><p>In sum, all significant predictions observed were positive with weak to moderate associations and indicate that higher responding in preceding phases predicted higher responding in subsequent phases for both SCRs and fear ratings.</p></sec><sec id="s2-6-3"><title>BOLD fMRI</title><p>In short, all but one association (CS discrimination in the NAcc) was positive, showing that higher BOLD response during acquisition was associated with higher BOLD responding during extinction training (see <xref ref-type="fig" rid="fig6">Figure 6</xref>). However, the standardized beta coefficients are mostly below or around 0.3 except for CS+ associations in the dACC, indicating non-substantial associations for all ROIs and CS specifications that were near absent for CS discrimination. Analysis of CS+ and CS− data was included here as the analysis is based on beta maps and not T-maps (as in previous analyses) where a contrast against baseline is not optimal.</p></sec></sec><sec id="s2-7"><title>Cross-phases predictability depends on data specifications</title><p>Pooled across all other data specifications, some interesting patterns can be extracted: First, standardized betas were significantly lower for raw (<italic>t</italic>(65) = 8.08, p < 0.001, <italic>d</italic> = 0.99) and log-transformed (<italic>t</italic>(65) = 8.26, p < 0.001, <italic>d</italic> = 1.02) as compared to log-transformed and range corrected SCRs while standardized betas derived from the former did not differ significantly (<italic>t</italic>(65) = −0.26, p = 0.794, <italic>d</italic> = −0.03). Second, standardized betas derived from ranked and non-ranked analyses were comparable for fear ratings (<italic>t</italic>(32) = 1.26, p = 0.218, <italic>d</italic> = 0.22) but not for SCRs with significantly higher betas for non-ranked as opposed to ranked SCRs (<italic>t</italic>(98) = 2.37, p = 0.020, <italic>d</italic> = 0.24). Third, standardized betas for CS discrimination were significantly lower than for CS+ and CS− for both SCRs (CS+: <italic>t</italic>(65) = −15.31, p < 0.001, <italic>d</italic> = −1.88 and CS−: <italic>t</italic>(65) = −12.34, p < 0.001, <italic>d</italic> = −1.52) and BOLD fMRI (CS+: <italic>t</italic>(11) = −4.65, p < 0.001, <italic>d</italic> = −1.34 and CS−: <italic>t</italic>(11) = −3.05, p = 0.011, <italic>d</italic> = −0.88), while for ratings, standardized betas for CS discrimination were higher than for the CS− (<italic>t</italic>(21) = 3.11, p = 0.005, <italic>d</italic> = 0.66) and comparable to those for the CS+ (<italic>t</italic>(21) = −0.57, p = 0.572, <italic>d</italic> = −0.12). Furthermore standardized betas were larger for the CS+ than for the CS− for SCRs (<italic>t</italic>(65) = 3.79, p < 0.001, <italic>d</italic> = 0.47), ratings (<italic>t</italic>(21) = 3.12, p = 0.005, <italic>d</italic> = 0.67) and BOLD fMRI (<italic>t</italic>(11) = 4.34, p = 0.001, <italic>d</italic> = 1.25). Fourth, standardized betas derived from regressions predicting fear recall were significantly higher than for reinstatement-test for both SCRs (<italic>t</italic>(124) = 4.35, p < 0.001, <italic>d</italic> = 0.86) and fear ratings (<italic>t</italic>(40) = 5.15, p < 0.001, <italic>d</italic> = 1.76).</p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>In fear conditioning research, little is known about longitudinal reliability (in the literature often referred to as test–retest reliability) for common outcome measures and almost nothing is known about their internal consistency and to what extent predictability across experimental phases is possible.</p><p>Here, we aimed to fill this gap and complement traditionally used approaches focusing on ICCs (summarized in <xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>) with (1) analyses of response similarity, (2) the degree of overlap of individual-level brain activation patterns as well as (3) by exploring longitudinal reliability at the group level in addition to (4) internal consistency across outcome measures.</p><p>Moreover, we also directly investigated predictability of responding from one experimental phase to subsequent experimental phases. For all analyses, we followed a multiverse-inspired approach (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>) by taking into account different data specifications.</p><p>Overall, longitudinal group-level reliability was robust for SCRs (see <xref ref-type="fig" rid="fig4">Figure 4</xref>) and the BOLD response (see <xref ref-type="table" rid="table2">Table 2B</xref>) while longitudinal individual-level reliability as assessed by ICCs (see <xref ref-type="fig" rid="fig1">Figure 1C–F</xref>), and individual-level BOLD activation overlap (see <xref ref-type="table" rid="table2">Table 2A</xref>) was more limited across outcome measures and data specifications – particularly during extinction training. This is in line with previous work in fear conditioning (<xref ref-type="bibr" rid="bib17">Cooper et al., 2022b</xref>; <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>; <xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>; <xref ref-type="bibr" rid="bib122">Torrents-Rodas et al., 2014</xref>; <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>) reporting figures for longitudinal individual-level reliability comparable to ours across outcome measures (SCRs, fear ratings, BOLD fMRI) and experimental phases. Importantly, however, it remains a challenge to interpret the results as benchmarks for ICCs are derived from psychometric work on trait self-report measures and it is plausible that what is interpreted as ‘low’ and ‘high’ reliability in experimental work should be substantially lower (<xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref>).</p><p>Our complementary analyses beyond traditional ICCs indicate that SCRs of one individual at T0 were not more similar to responses of the same individual at T1 than compared to others at T1 (see <xref ref-type="fig" rid="fig2">Figure 2</xref>). For BOLD fMRI, however, acquisition-related individual BOLD activation patterns at T0 were more similar to their own activation patterns at T1 than to other individuals’ activation patterns (see <xref ref-type="fig" rid="fig3">Figure 3</xref>). This was, however, not the case for extinction. Hence, this may suggest that BOLD fMRI might be more sensitive to detect similarity at individual-level responses within participants than SCRs in our data – maybe due to the dependence on spatial (i.e., voxel-by-voxel) rather than temporal (i.e., trial-by-trial) patterns.</p><p>Furthermore, we observed a few differences in longitudinal reliability at the individual level depending on data processing specifications (see also <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>). For most data specifications, reliability was slightly higher for log-transformed and range-corrected SCRs (as opposed to raw and only log-transformed data) while – in contrast to what has been shown for other paradigms and outcome measures (<xref ref-type="bibr" rid="bib7">Baker et al., 2021</xref>; see also <ext-link ext-link-type="uri" xlink:href="https://shiny.york.ac.uk/powercontours/">https://shiny.york.ac.uk/powercontours/</ext-link>) – an increasing number of trials included in the calculation of ICCs did not generally improve reliability (see <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplements 3</xref>–<xref ref-type="fig" rid="fig1s8">8</xref>). Together, this suggests that longitudinal reliability at the individual level is relatively stable across different data transformations and paradigm specifications (e.g., number of trials within the range used here, i.e., 1 to maximum 14) which is important information facilitating the integration of previous work using different time intervals, reliability indices, and paradigms (see <xref ref-type="supplementary-material" rid="supp1">Supplementary file 1</xref>; <xref ref-type="bibr" rid="bib17">Cooper et al., 2022b</xref>; <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>; <xref ref-type="bibr" rid="bib103">Ridderbusch et al., 2021</xref>; <xref ref-type="bibr" rid="bib122">Torrents-Rodas et al., 2014</xref>; <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>).</p><p>In contrast, we observed quite robust longitudinal reliability at the group level for both SCRs (see <xref ref-type="fig" rid="fig4">Figure 4</xref>) and BOLD fMRI (see <xref ref-type="table" rid="table2">Table 2B</xref>) between both time points with substantial (i.e., up to 90%) overlap in group-level BOLD fMRI activation patterns (whole brain and ROI based) as well as substantial (i.e., up to 76%) explained variance at T1 by variance at T0 for SCRs. However, this was generally only true for acquisition but not extinction training. This pattern of higher reliability during acquisition compared to extinction training has been described in the literature (SCRs: <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>; <xref ref-type="bibr" rid="bib139">Zeidan et al., 2012</xref>) and was also evident in the similarity analyses of BOLD fMRI and the group-level reliability of SCRs. While this pattern did not emerge across all analyses, it appears to be particularly present when examining reliability of CS discrimination as it was the case for BOLD fMRI and as it also emerged in individual-level reliability analyses of CS discrimination in SCRs (internal consistency and ICCs) and fear ratings (ICCs). Since CS discrimination is typically lower during extinction as compared to acquisition training, this restriction of variance potentially resulted in a floor effect which might have lowered the internal consistency and longitudinal reliability of CS discrimination during extinction training.</p><p>Reports regarding this discrepancy between group- and individual-level longitudinal reliability were recently highlighted for a number of (classic) experimental paradigms (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>; <xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib50">Herting et al., 2018</xref>; <xref ref-type="bibr" rid="bib94">Plichta et al., 2012</xref>; <xref ref-type="bibr" rid="bib107">Schümann et al., 2020</xref>). Our results add fear conditioning and extinction as assessed by SCRs and BOLD fMRI to this list and have important implications for translational questions aiming for individual-level predictions – particularly since findings obtained at the group level are not necessarily representative for any individual within the group (<xref ref-type="bibr" rid="bib29">Fisher et al., 2018</xref>).</p><p>In addition to these methods-focused insights, we observed significant associations between responding in different experimental phases for SCR (see <xref ref-type="fig" rid="fig5">Figure 5A</xref>), fear ratings (see <xref ref-type="fig" rid="fig5">Figure 5B</xref>) and BOLD fMRI (see <xref ref-type="fig" rid="fig6">Figure 6</xref>) revealing that higher responses in previous phases were generally modestly associated with higher responses in subsequent phases in all outcome measures. However, a remarkable amount of predictions were non-significant – which was particularly true for CS discrimination in SCRs and BOLD fMRI. This may be explained by difference scores (i.e., CS+ minus CS−) being generally less reliable (<xref ref-type="bibr" rid="bib51">Infantolino et al., 2018</xref>; <xref ref-type="bibr" rid="bib72">Lynam et al., 2006</xref>) due to a subtraction of meaningful variance (<xref ref-type="bibr" rid="bib81">Moriarity and Alloy, 2021</xref>) particularly in highly correlated predictors (<xref ref-type="bibr" rid="bib119">Thomas and Zumbo, 2012</xref>). Especially at the end of the extinction, CS discrimination is low and hence, variance limited. Therefore, floor effects may contribute to the non-significant effects for extinction end-point performance.</p><p>Mixed findings in the literature support both the independence of conditioned responding in different experimental phases (<xref ref-type="bibr" rid="bib14">Bouton et al., 2006</xref>; <xref ref-type="bibr" rid="bib93">Plendl and Wotjak, 2010</xref>; <xref ref-type="bibr" rid="bib96">Prenoveau et al., 2013</xref>; <xref ref-type="bibr" rid="bib110">Shumake et al., 2014</xref>) but also their dependence – particularly in clinical samples (<xref ref-type="bibr" rid="bib30">Foa et al., 1983</xref>; <xref ref-type="bibr" rid="bib99">Rauch et al., 2004</xref>; <xref ref-type="bibr" rid="bib105">Rothbaum et al., 2014</xref>; <xref ref-type="bibr" rid="bib113">Smits et al., 2013a</xref>; <xref ref-type="bibr" rid="bib114">Smits et al., 2013b</xref>). These diverging findings in experimental and clinical studies might point toward a translational gap. However, our work may suggest that the strengths of associations between responding in different phases depended on the specific outcome measure and its specifications (e.g., responses specified as CS discrimination, CS+, or CS−). Yet another explanation – in particular for predictions spanning a 24 hrs delay in experimental phases − might be that individual differences in consolidation efficacy (e.g., how efficiently the fear and extinction memories are consolidated after performing acquisition and extinction training, respectively) may underlie differences in predictability. For example, the performance during a retention or RoF test phase is considered to be determined by the strength of the fear and extinction memory, respectively. Memory strength, however, is not only determined by the strength of the initially acquired memory but also by its consolidation (discussed in <xref ref-type="bibr" rid="bib68">Lonsdorf et al., 2019b</xref>). Thus, as acquisition training preceded the extinction training and reinstatement-test by 24 hrs, it is highly likely that individual differences in consolidation efficacy also impact on performance at test. This has also implications for the common practice of correcting responses during one experimental phase for responding during preceding experimental phases (discussed in <xref ref-type="bibr" rid="bib68">Lonsdorf et al., 2019b</xref>).</p><p>Importantly, together with our observation of robust internal consistency (see <xref ref-type="fig" rid="fig1">Figure 1</xref> and also <xref ref-type="bibr" rid="bib33">Fredrikson et al., 1993</xref>), this pattern of findings suggests that individual-level predictions at short intervals are plausible but might be more problematic for longer time periods as suggested by the limited stability over time in our data.</p><p>Yet, we would like to point out that the values we report may in fact point toward good and not limited longitudinal individual-level reliability as our interpretation is guided by benchmarks that were not developed for experimental data but from psychometric work on trait self-report measures. We acknowledge that the upper bound of maximally observable reliability may differ between both cases of application as empirical neuroscientific research inherently comes with more noise. The problem remains that predictions in fear conditioning paradigms appear to not be meaningful for longer periods of time (~6 months). Thus, a key contribution of our work is that it highlights the need to pay more attention to measurement properties in translational research in general and fear conditioning research specifically (e.g., implement reliability calculations routinely in future studies). To date, it remains an open question what ‘good reliability’ in experimental neuroscientific work actually means (<xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref>).</p><p>Yet, before discussing implications of our results in detail, some reflections on potential (methodological) reasons for (1) limited individual-level but robust group-level reliability and (2) on the role of time interval lengths deserve attention:</p><p>First, the limited longitudinal individual-level reliability might indicate that the fear conditioning paradigm employed here – which is a rather strong paradigm with 100% reinforcement rate – may be better suited for investigations of group effects and to a lesser extent for individual difference questions – potentially due to limited variance between individuals (<xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>; <xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref>). However, high reliability appears to be possible in principle, as we can conclude from the robust internal consistency of SCRs that we observed. This speaks against a limited between-subject variance and a general impracticability of the paradigm for individual difference research. Hence, we call for caution and warn against concluding from our report that fear conditioning and our outcome measures (SCRs, BOLD fMRI) are unreliable at the individual level.</p><p>Second, limited individual-level but robust group-level longitudinal reliability might be (in part) due to different averaging procedures which impacts error variance (<xref ref-type="bibr" rid="bib54">Kennedy et al., 2021</xref>). More precisely, compared to individual-level data, group-level data are based on highly aggregated data resulting in generally reduced error variance which increases group-level reliability.</p><p>Third, different operationalizations of the same measurement might have different reliabilities (<xref ref-type="bibr" rid="bib59">Kragel et al., 2021</xref>). For instance, amygdala habituation has been shown to be a more reliable measure than average amygdala activation (<xref ref-type="bibr" rid="bib95">Plichta et al., 2014</xref>) and more advanced analytical approaches such as intraindividual neural response variability (<xref ref-type="bibr" rid="bib74">Månsson et al., 2021</xref>) and multivariate imaging techniques <xref ref-type="bibr" rid="bib59">Kragel et al., 2021</xref>; <xref ref-type="bibr" rid="bib75">Marek et al., 2020</xref>; <xref ref-type="bibr" rid="bib87">Noble et al., 2021</xref>; <xref ref-type="bibr" rid="bib128">Visser et al., 2021</xref> have been suggested to have better (longitudinal) reliability than more traditional analyses approaches. Similarly, methodological advances (e.g., techniques to adjust the functional organization of the brain across participants, <xref ref-type="bibr" rid="bib56">Kong et al., 2021</xref>; or hyperalignment, <xref ref-type="bibr" rid="bib28">Feilong et al., 2021</xref>) in measurement quality and tools may ultimately result in better reliability estimates (<xref ref-type="bibr" rid="bib22">DeYoung et al., 2022</xref>).</p><p>Fourth, as discussed above, caution is warranted as traditional benchmarks for ‘good’ reliability were not developed for experimental work but mainly from psychometric work on trait self-report measures (see above).</p><p>Finally, longitudinal reliability refers to measurements obtained under the same conditions and hence it is both plausible and well established that higher reliability is observed at short test–retest intervals (see also <xref ref-type="bibr" rid="bib87">Noble et al., 2021</xref>; <xref ref-type="bibr" rid="bib129">Werner et al., 2022</xref>). Longer intervals are more susceptible to true changes of the measurand – for instance due to environmental influences such as seasonality, temperature, hormonal status, or life events (see <xref ref-type="bibr" rid="bib116">Specht et al., 2011</xref>; <xref ref-type="bibr" rid="bib124">Vaidya et al., 2002</xref>). Indeed most longitudinal reliability studies in the fMRI field used shorter intervals (<6 months, see <xref ref-type="bibr" rid="bib26">Elliott et al., 2020</xref>; <xref ref-type="bibr" rid="bib87">Noble et al., 2021</xref>) than our 6-month interval and hence our results should be conceptualized as longitudinal stability rather than a genuine test–retest reliability. The satisfactory internal consistency speaks against excessive noisiness inherent to our measures as a strong noisiness would also be evident in measurements within one time point and not only emerge across our retest interval. Thus, we rather suggest a true change of the measurand during our retest interval and hence a potentially stronger state than trait dependency.</p><p>What do our findings imply? Fear conditioning research has been highlighted as a particularly promising paradigm for the translation of neuroscientific findings into the clinics (<xref ref-type="bibr" rid="bib2">Anderson and Insel, 2006</xref>; <xref ref-type="bibr" rid="bib16">Cooper et al., 2022a</xref>; <xref ref-type="bibr" rid="bib36">Fullana et al., 2020</xref>; <xref ref-type="bibr" rid="bib80">Milad and Quirk, 2012</xref>) and some of the most pressing translational questions are based on individual-level predictions such as predicting treatment success. Our results, however, suggest that measurement reliability may allow for individual-level predictions for (very) short but potentially less so for longer time intervals (such as our 6 months retest interval). Importantly, however, robust group-level reliability appears to allow for group-level predictions over longer time intervals. This applies to SCRs and BOLD fMRI in our data but note that the latter was not investigated for fear ratings. A potential solution and promising future avenue to make use of both good group-level reliability and individual-level predictions might be the use of homogenous (latent) subgroups characterized by similar response profiles (e.g., rapid, slow or no extinction, <xref ref-type="bibr" rid="bib37">Galatzer-Levy et al., 2013a</xref>) – to exploit the fact that reliability appears to be higher for more homogenous samples (<xref ref-type="bibr" rid="bib44">Gulliksen, 1950</xref>).</p><p>While general recommendations and helpful discussions on the link between reliability and number of trials (<xref ref-type="bibr" rid="bib7">Baker et al., 2021</xref>), statistical power (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>), maximally observable correlations (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>), sample and effect size (<xref ref-type="bibr" rid="bib49">Hedge et al., 2018</xref>; <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>) considerations exist, our results highlight the need for field and subdiscipline specific considerations. Our work allows for some initial recommendations and insights. First, we highlight the value of using multiple, more nuanced measures of reliability beyond traditional ICCs (i.e,. similarity, overlap, <xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>) and second, the relation between number of trials and reliability in an experiment with a learning component (i.e., no increase in reliability with an increasing number of trials). Importantly, our work can also be understood as an empirically based call for action, since more work is needed to allow for clear-cut recommendations, and as a starting point to develop and refine comprehensive guidelines in the future. We also echo the cautionary note of Parsons that ‘estimates of reliability refer to the measurement obtained – in a specific sample and under particular circumstances, including the task parameters’ (cf. <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>). Hence, it is important to remember that reliability is a property of a measure that is not fixed and may vary depending on task specifications and samples. In other words, reliability is not a fixed property of the task itself, here fear conditioning.</p><p>We argue that we may need to take a (number of) step(s) back and develop paradigms and data processing pipelines explicitly tailored to individual difference research (i.e., correlation) or experimental (i.e., group level) research questions (e.g., <xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>) and focus more strongly on measurement reliability in experimental work – which has major consequences on effect sizes and statistical power (<xref ref-type="bibr" rid="bib26">Elliott et al., 2020</xref>). More precisely, multiverse-type investigations (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>; <xref ref-type="bibr" rid="bib118">Steegen et al., 2016</xref>) that systematically scrutinize the impact of several alternative and equally justifiable processing and analytical decisions in a single dataset (<xref ref-type="bibr" rid="bib61">Kuhn et al., 2022</xref>; <xref ref-type="bibr" rid="bib69">Lonsdorf et al., 2022</xref>; <xref ref-type="bibr" rid="bib112">Sjouwerman et al., 2022</xref>) – as also done here for transformations and number of trials – may be helpful to ultimately achieve this overarching aim. This could be complemented by systematically varying design specifications (<xref ref-type="bibr" rid="bib46">Harder, 2020</xref>) which are extensively heterogeneous in fear conditioning research (<xref ref-type="bibr" rid="bib65">Lonsdorf et al., 2017a</xref>). Calibration approaches, as recently suggested <xref ref-type="bibr" rid="bib6">Bach et al., 2020</xref> follow a similar aim.</p><p>Such work on measurement questions should be included in cognitive-experimental work as a standard practice (<xref ref-type="bibr" rid="bib90">Parsons, 2020</xref>) and can (often) be explored in a cost and resource effective way in existing data which in the best case are openly available – which, however, requires cross-lab data sharing and data management homogenization plans. Devoting resources and funds to measurement optimization is a valuable investment into the prospect of this field contributing to improved mental health (<xref ref-type="bibr" rid="bib81">Moriarity and Alloy, 2021</xref>) and to resume the path to successful translation from neuroscience discoveries into clinical applications.</p></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><sec id="s4-1"><title>Pre-registration</title><p>This project has been pre-registered on the Open Science Framework (OSF) (August 03, 2020; retrieved from <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/NH24G">https://doi.org/10.17605/OSF.IO/NH24G</ext-link>). Deviations from the pre-registered protocol are made explicit in brief in the methods section and reasons are specified in <xref ref-type="supplementary-material" rid="supp2">Supplementary file 2</xref> as recommended by <xref ref-type="bibr" rid="bib88">Nosek et al., 2018</xref>, who note that such deviations are common and occur even in the most predictable analysis plans.</p></sec><sec id="s4-2"><title>Participants</title><p>Participants were selected from a large cohort providing participants for subsequent studies as part of the Collaborative Research Center CRC 58. Participants from this sample were recruited for this study through a phone interview. Only healthy individuals between 18 and 50 years of age without a history of childhood trauma according to the Childhood Trauma Questionnaire (CTQ, critical cutoffs as identified by <xref ref-type="bibr" rid="bib11">Bernstein et al., 2003</xref>; <xref ref-type="bibr" rid="bib47">Häuser et al., 2011</xref>). Additional exclusion criteria were claustrophobia, cardiac pacemaker, non-MR-compatible metal implants, brain surgery, left handedness, participation in pharmacological studies within the past 2 weeks, medication except for oral contraceptives, internal medical disorders, chronic pain, neurological disorders, psychiatric disorders, metabolic disorders, acute infections, complications with anesthesia in the past and pregnancy. Participants were right handed and had normal or corrected to normal vision. All participants gave written informed consent to the protocol which was approved by the local ethics committee (PV 5157, Ethics Committee of the General Medical Council Hamburg). The study was conducted in accordance with the Declaration of Helsinki. All participants were naive to the experimental setup and received a financial compensation of 170€ for completion of experiments at both time points (T0 and T1).</p><p>The total sample consisted of 120 participants (female<italic><sub>N</sub></italic> = 79, male<italic><sub>N</sub></italic> = 41, age<italic><sub>M</sub></italic> = 24.46, age<sub>SD</sub> = 3.73, age<sub>range</sub> = 18–34). At T0 on days 1 and 2, in total 13 participants were excluded due to technical issues (day 1: <italic>N</italic> = 0; day 2: <italic>N</italic> = 3), deviating protocols (day 1: <italic>N</italic> = 2; day 2: <italic>N</italic> = 0) and SCR non-responding (day 1: <italic>N</italic> = 3; day 2: <italic>N</italic> = 5, see below for definition of ‘non-responding’). Accordingly, the final dataset for the cross-sectional analysis of T0 data consists of 107 subjects (female<italic><sub>N</sub></italic> = 70, male<italic><sub>N</sub></italic> = 37, age<italic><sub>M</sub></italic> = 24.30, age<sub>SD</sub> = 3.68, age<sub>range</sub> = 18–34). 84.11% of these participants were aware and 6.54% were unaware of CS–US contingencies. The remaining 9.35% subjects uncertain of the CS–US contingencies were classified as semi-aware. CS–US contingency awareness of participants was assessed with a standardized post-experimental awareness interview (adapted from <xref ref-type="bibr" rid="bib9">Bechara et al., 1995</xref>). On average, the US aversiveness was rated on day 1 with a value of 19.82 (SD = 3.28) and on day 2 with a value of 16.46 (SD = 4.75) on a visual analog scale (VAS) ranging from 0 to 25. The US intensity was 8.04 mA (SD = 8.28) on average. Averaged STAI-S (Strait-Trait Anxiety Inventory – State; <xref ref-type="bibr" rid="bib117">Spielberger, 1983</xref>) scores were 35.38 (SD = 5.26) on day 1 and 35.57 (SD = 6.69) on day 2.</p><p>At T1, 16 subjects were excluded due to technical issues (day 1: <italic>N</italic> = 1; day 2: <italic>N</italic> = 1), deviating protocols (day 1: <italic>N</italic> = 3; day 2: <italic>N</italic> = 0) and SCR non-responding (day 1: <italic>N</italic> = 5; day 2: <italic>N</italic> = 6; see below for definition of ‘non-responding’). Additionally, 20 participants dropped out between T0 and T1 leaving 71 subjects for longitudinal analyses (female<italic><sub>N</sub></italic> = 41, male<italic><sub>N</sub></italic> = 30, age<italic><sub>M</sub></italic> = 24.63, age<sub>SD</sub> = 3.77, age<sub>range</sub> = 18–32). 88.73% of the participants were aware and 1.41% were unaware of CS–US contingencies. The remaining 9.86% were classified as semi-aware. US aversiveness was rated with <italic>M</italic> = 19.96 (SD = 2.99) on day 1 and with <italic>M</italic> = 17.73 (SD = 3.90) on day 2 (VAS = 0–25). On average, the US intensity amounted to 9.76 mA (SD = 13.18). Averaged STAI-S scores were 36.33 (SD = 6.09) on day 1 and 35.83 (SD = 7.10) on day 2.</p></sec><sec id="s4-3"><title>Experimental design</title><p>Here, we reanalyzed pre-existing data that are part of a larger longitudinal study that spanned six time points. In the current study, we included data from a 2-day fear conditioning experiment which were collected at two time points (T0 and T1) 6 months apart. The 2-day experimental procedure and the stimuli were identical at both time points. Measures acquired during the full longitudinal study that are not relevant for the current work such as questionnaires, hair, and salivary cortisol are not described in detail here. For an illustration of the experimental design, see also <xref ref-type="fig" rid="fig7">Figure 7</xref>.</p><fig id="fig7" position="float"><label>Figure 7.</label><caption><title>Illustration of the experimental design (<bold>A</bold>) and of the calculations of different measures for skin conductance responses (SCRs) including (averaged) acquisition trials (<bold>B</bold>).</title><p>Note that the habituation phase is not shown in the figure, but described in the text.</p></caption><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/fig7.jpg"/></fig></sec><sec id="s4-4"><title>Experimental protocol and stimuli</title><p>The protocol consisted of a habituation and a fear acquisition training phase on day 1 and an extinction training, reinstatement, and reinstatement-test phase on day 2. Acquisition and extinction training included 28 trials each (14 CS+/14 CS−), habituation and the reinstatement-test phase 14 trials each (7 CS+/7 CS−). Acquisition training was designed as delay conditioning with the US being presented 0.2 s before CS+ offset with 100% reinforcement rate (i.e., all CS+ presentations followed by the US). CSs were two light gray fractals (RGB [230, 230, 230]), 492*492 pixels presented in a pseudo-randomized order, with no more than two identical stimuli in a row, for 6–8 s (mean: 7 s). During the intertrial interval (ITI), a white fixation cross was shown for 10–16 s (mean: 13 s). Reinstatement consisted of three trials with a duration of 5 s each presented after a 10 s ITI. Reinstatement USs were delivered 4.8 s after each trial onset. The reinstatement phase was followed by a 13 s ITI before the next CS was presented during reinstatement-test. All stimuli were presented on a gray background (RGB [100, 100, 100]) using <xref ref-type="bibr" rid="bib97">Presentation software, 2010</xref> (Version 14.8, Neurobehavioral Systems, Inc, Albany, CA USA) keeping the context constant to avoid renewal effects (<xref ref-type="bibr" rid="bib45">Haaker et al., 2014</xref>). Visual stimuli were identical for all participants, but allocation to CS+/CS− and CS type of the first trial of each phase were counterbalanced across participants.</p><p>The electrotactile US consisted of a train of three 2 ms electrotactile rectangular pulses with an interpulse interval of 50 ms generated by a Digitimer DS7A constant current stimulator (Welwyn Garden City, Hertfordshire, UK) and was administered to the back of the right hand of the participants through a 1-cm diameter platinum pin surface electrode. The electrode was attached between the metacarpal bones of the index and middle finger. The US was individually calibrated in a standardized stepwise procedure controlled by the experimenter aiming at an unpleasant, but still tolerable level rated by the participants between 7 and 8 on scale from zero (=stimulus was not unpleasant at all) to 10 (=stimulus was the worst one could imagine within the study context). Participants were, however, not informed that we aimed at a score of 7–8.</p></sec><sec id="s4-5"><title>Outcome measures</title><sec id="s4-5-1"><title>Skin conductance responses</title><p>SCRs were acquired continuously during each phase of conditioning using a BIOPAC MP 100 amplifier (BIOPAC Systems, Inc, Goleta, CA, USA) and Spike 2 software (Cambridge Electronic Design, Cambridge, UK). For analog to digital conversion, a CED2502-SA was used. Two self-adhesive hydrogel Ag/AgCl-sensor recording SCR electrodes (diameter = 55 mm) were attached on the palm of the left hand on the distal and proximal hypothenar. A 10 Hz lowpass filter and a gain of 5Ω were applied. Data were recorded at 1000 Hz and later downsampled to 10 Hz. Subsequently, SCRs were scored semi-manually using the custom-made computer program EDA View (developed by Prof. Dr. Matthias Gamer, University of Würzburg). The program is used to quantify the SCR amplitude based on the trough-to-peak method with the trough occurring at 0.9–3.5 s after CS onset and 0.9–2.5 s after US onset (<xref ref-type="bibr" rid="bib12">Boucsein et al., 2012</xref>; <xref ref-type="bibr" rid="bib111">Sjouwerman and Lonsdorf, 2019</xref>). The maximum rise time was set to maximally 5 s (<xref ref-type="bibr" rid="bib12">Boucsein et al., 2012</xref>) unless the US occurred earlier. SCRs confounded by recording artifacts due to technical reasons, such as electrode detachment or responses moving beyond the sampling window, were discarded and scored as missing values. SCRs smaller than 0.01 μS within the defined time window were defined as zero responses. Participants with zero responses to the US in more than two-thirds (i.e., more than 9 out of 14) of US acquisition trials were classified as non-responders on day 1. On day 2, non-responding was defined as no response to any of the three reinstatement USs.</p><p>SCR data were prepared for response quantification by using <xref ref-type="bibr" rid="bib76">MATLAB, 2016</xref> version R2016b. No learning could have possibly taken place during the first CS presentations as the US occurred only after the CS presentation. Consequently, the first CS+ and CS− trials during acquisition training were excluded from analyses. Hence, a total of 26 trials (13 differential SCRs) for the acquisition training phase were included in the analyses. For US analyses, all 14 trials were entered into the analyses.</p><p>Similarly, responses to the first CS+ and CS− during extinction training have to be considered a 24 hrs delayed test of fear recall as no extinction learning could have taken place. Hence, the first trial and the remaining trials of the extinction were analyzed separately. CS discrimination was computed by subtracting (averaged) CS− responses from (averaged) CS+ responses.</p></sec><sec id="s4-5-2"><title>Fear ratings</title><p>Fear ratings to the CSs were collected prior to and after acquisition and extinction training as well as after the reinstatement-test. Participants were asked ‘how much stress, fear and tension’ they experienced when they last saw the CS+ and CS−. After reinstatement-test, ratings referred to (1) the first CS presentation per CS type directly after reinstatement as well as (2) the last CS presentation during reinstatement-test. After acquisition training and the reinstatement-test, subjects were also asked how uncomfortable they experienced the US itself. All ratings were given on a VAS ranging from zero (answer = none) to 100 (answer = maximum). For analyses, the rating scale was reduced to 0–25. Participants had to confirm the ratings via button press. A lack of confirmation resulted in exclusion of the trial from analyses. CS discrimination was computed by subtracting CS− from CS+ ratings.</p></sec></sec><sec id="s4-6"><title>BOLD fMRI: data acquisition, preprocessing, and first-level analysis</title><p>The inclusion of BOLD fMRI data was not initially planned and is included here as an additional non-pre-registered outcome measure.</p><sec id="s4-6-1"><title>Data acquisition</title><p>Functional data were acquired with a 3 Tesla PRISMA whole body scanner (Siemens Medical Solutions, Erlangen, Germany) using a 64-channel head coil and an echo planar imaging sequence (repetition time: 1980 ms, echo time: 30 ms, number of slices: 54, slice thickness: 1.7 mm [1 mm gap], field of view = 132 × 132 mm). T1-weighted structural images were acquired using a magnetization prepared rapid gradient echo (MPRAGE) sequence (TR: 2300 ms, TE: 2.98 ms, number of slices: 240, slice thickness: 1 mm, field of view = 192 × 256 mm).</p></sec><sec id="s4-6-2"><title>Preprocessing</title><p>fMRI data analysis was performed using SPM12 (Wellcome Department of Neuroimaging, London, UK) and <xref ref-type="bibr" rid="bib77">MATLAB, 2019</xref>. Preprocessing included realignment, coregistration, normalization to a group-specific DARTEL template and smoothing (6 mm full width at half maximum, FWHM).</p></sec><sec id="s4-6-3"><title>First-level analysis</title><p>Regressors for the first-level analysis of acquisition training data included separate regressors for the first CS+ and CS− trials and the remaining CS+ and CS− trials because no learning could have occurred at the first presentation of the CSs. Nuisance regressors included habituation trials, US presentation, fear ratings and motion parameters. Likewise, separate regressors for the first CS+ and CS− trials of extinction (because no extinction has taken place yet) as well as the remaining CS+ and CS− trials were included as regressors of interest in the first-level analysis of extinction data acquired on day 2, while US, rating onset and motion parameters were included as regressors of no interest. No second-level analysis was completed in the current study, instead different analyses were carried out based on first-level models as further detailed in the statistical analysis section.</p></sec><sec id="s4-6-4"><title>Regions of interest</title><p>A total of 11 ROIs (i.e., bilateral anterior insula, amygdala, hippocampus, caudate nucleus, putamen, pallidum, NAcc, thalamus, dACC, dlPFC, and vmPFC) were included in the current study. Amygdala, hippocampus, caudate nucleus, putamen, pallidum, ventral striatum (i.e., NAcc), and thalamus anatomical masks were extracted from the Harvard-Oxford atlas (<xref ref-type="bibr" rid="bib21">Desikan et al., 2006</xref>) at a maximum probability threshold of 0.5. The anterior insula was defined as the overlap between the thresholded anatomical mask from the Harvard-Oxford atlas (threshold: 0.5) and a box of size 60 × 30 × 60 mm centered around MNI<italic>xyz</italic> = 0, 30, 0 based on anatomical subdivisions (<xref ref-type="bibr" rid="bib86">Nieuwenhuys, 2012</xref>). The cortical ROI dlPFC and dACC were created by building a box of size 20 × 16 × 16 mm around peak voxels obtained in a meta-analysis (with the <italic>x</italic> coordinate set to 0 for the dACC) (left dlPFC: MNI<italic>xyz</italic> = −36, 44, 22, right dlPFC: MNI<italic>xyz</italic> = 34, 44, 32, dACC: MNI<italic>xyz</italic> = 0, 18, 42, <xref ref-type="bibr" rid="bib35">Fullana et al., 2016</xref>). As previously reported (<xref ref-type="bibr" rid="bib64">Lonsdorf et al., 2014</xref>), the cortical ROI vmPFC was created by using a box of size 20 × 16 × 16 mm centered on peak coordinates identified in prior studies of fear learning (vmPFC: MNI<italic>xyz</italic> = 0, 40, −12, e.g., <xref ref-type="bibr" rid="bib52">Kalisch et al., 2006</xref>, <xref ref-type="bibr" rid="bib78">Milad et al., 2007</xref>) with the x coordinate set to 0 to obtain masks symmetric around the midline.</p><p>All analyses of BOLD fMRI as described below were conducted separately not only for the whole brain but also for these 11 selected ROIs.</p></sec></sec><sec id="s4-7"><title>Statistical analyses</title><p>For a comprehensive overview of which analysis was carried out for which outcome measures, stimuli, phases and data transformations (see <xref ref-type="table" rid="table1">Table 1</xref>). For an illustration of which data were included in the different analyses, see also <xref ref-type="fig" rid="fig7">Figure 7B</xref>.</p><sec id="s4-7-1"><title>Internal consistency</title><p>We assessed the internal consistency of SCRs for both time points and experimental phases separately (for details, see <xref ref-type="table" rid="table1">Table 1</xref>): trials of the respective time point and phase were split into odd and even trials (i.e., odd–even approach) and averaged for each individual subject. Averaged odd and even trials were then correlated by using Pearson’s correlation coefficient. To obtain a rather conservative result, we refrained from applying the Spearman–Brown prophecy formula. We considered the odd–even approach as the most appropriate since our paradigm constitutes a learning experiment and we suggest that adjacent trials measure a more similar construct compared to other possible splits of trials such as a split into halves or a large number of random splits as implemented in the permutation-based approach recommended by <xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref>. Calculations of internal consistency were not possible for fear ratings and BOLD fMRI due to the limited number of data points for fear ratings and an experimental design that did not allow for a trial-by-trial analysis of BOLD fMRI data. Internal consistency was interpreted using benchmarks for unacceptable (<0.5), poor (>0.5 but <0.6), questionable (>0.6 but <0.7), acceptable (>0.7 but <0.8), good (>0.8 but <0.9), and excellent (≥0.9) (<xref ref-type="bibr" rid="bib55">Kline, 2013</xref>).</p></sec></sec><sec id="s4-8"><title>Longitudinal reliability at the individual and group level</title><p>While internal consistency indicates the extent to which all items of a test or – here, trials of an experimental phase – measure the same construct (<xref ref-type="bibr" rid="bib101">Revelle, 1979</xref>), longitudinal reliability reflects the variability across two or more measurements of the same individual under the same conditions and is therefore indicative of the degree of correlation and agreement between measurements (<xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>). For calculations of longitudinal reliability, we included data from both time points T0 and T1 from the same experimental phase. To capture different aspects of longitudinal reliability, we chose a dual approach of calculating longitudinal reliability at both (1) the individual level and (2) at the group level (for details see also <xref ref-type="table" rid="table1">Table 1</xref>). To this end, longitudinal reliability at the individual and group level indicates to which extent responses within the same individual and within the group as a whole are stable over time. More precisely, whereas longitudinal reliability at the individual level takes into account the individual responses of participants, which are then related across time points, reliability at the group level first averages the individual responses across the group and then relates them across time points. Reliability at the individual level inherently includes the group level, as it is calculated for the sample as whole, but the individual responses are central to the calculation. Contrarily, for reliability at the group level, the calculation is carried out using group averages.</p><p>Reliability at the individual level was investigated as (1) ICCs encompassing both time points, (2) within- and between-subject similarity of individual trial-by-trial responding (i.e., SCRs) or BOLD fMRI activation patterns between time points, and (3) as the degree of overlap of significant voxels between time points within an individual (for methodological details see below). Reliability at the group level was investigated as (1) trial-by-trial group average SCRs and (2) the degree of overlap of significant voxels between time points within the group as a whole (for methodological details see below).</p><p>Assessments of internal consistency, within- and between-subject similarity, overlap at the individual and group level as well as longitudinal reliability of SCRs at the group level were not pre-registered but are included as they provide valuable additional and complementary information. Overlap and similarity analyses follow the methodological approach of <xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>.</p></sec><sec id="s4-9"><title>Longitudinal reliability at the individual level</title><sec id="s4-9-1"><title>Intraclass correlation coefficients</title><p>ICCs were determined separately for each experimental phase by including data from both time points T0 and T1. Generally, larger ICCs indicate higher congruency of within-subject responding between time points and increased distinction of subjects from each other (<xref ref-type="bibr" rid="bib87">Noble et al., 2021</xref>). <xref ref-type="bibr" rid="bib89">Parsons et al., 2019</xref> recommend the calculations of ICCs in cognitive-behavioral tasks through a two-way mixed-effects model of single rater type labeled ICC(2,1) (absolute agreement, in the following referred to as ICC<sub>abs</sub>) and ICC(3,1) (consistency, in the following referred to as ICC<sub>con</sub>) according to <xref ref-type="bibr" rid="bib109">Shrout and Fleiss, 1979</xref> convention and to report their 95% CIs. Due to their slightly different calculations, ICC<sub>abs</sub> tends to be lower than ICC<sub>con</sub> (see <xref ref-type="table" rid="table1">Table 1</xref>).</p><p>However, as the pre-registered mixed-effects approach resulted in non-convergence of some models for SCRs and ratings, we implemented an analysis of variance (ANOVA) instead of the mixed-effects approach to calculate ICC<sub>abs</sub> and ICC<sub>con</sub> (<xref ref-type="bibr" rid="bib109">Shrout and Fleiss, 1979</xref>). To calculate ICCs for BOLD fMRI (additional not pre-registered analyses), the SPM-based toolbox fmreli (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>) was used. BOLD fMRI ICCs were determined for each voxel and averaged across the whole brain and for selected ROIs.</p><p>Furthermore, we investigated whether or to what extent ICCs change when ICC calculations were based on different numbers of trials. To this end, we included (additional non-pre-registered) analyses of trial-by-trial ICCs for SCRs in the supplementary material: First, ICCs were only computed for the first trial. Then, all subsequent trials of the respective phase were added stepwise to this first trial. After each step, trials were averaged and ICCs were calculated (see <xref ref-type="fig" rid="fig1s3">Figure 1—figure supplements 3</xref>–<xref ref-type="fig" rid="fig1s8">8</xref>).</p><p>Within the figures, values less than 0.5 are classified as poor reliability, values between 0.5 and 0.75 as indicative of moderate reliability, values between 0.75 and 0.9 are classified as good reliability and values greater than 0.9 as excellent reliability, as suggested by <xref ref-type="bibr" rid="bib57">Koo and Li, 2016</xref>. These benchmarks are included here to provide a frame of reference but we point out that these benchmarks are arbitrary and should hence not be overinterpreted in particular in the context of responding in experimental paradigms as these benchmarks have been developed in different contexts (i.e., trait self-report measures).</p></sec><sec id="s4-9-2"><title>Within- and between-subject similarity</title><p>Both ICCs and within-subject similarity indicate to which extent responses of an individual at one time point are comparable to responses of the same individual at a later time point. Both were calculated separately for each experimental phase by including data from both time points. There are, however, two main differences: First, ICCs were calculated by decomposition of variances as applied for ANOVA, whereas similarity was calculated as correlation of responses between both time points (1) within one individual (within-subject similarity) and (2) between this individual and all other individuals (between-subject similarity). Second, while ICCs are interpreted in terms of absolute values using cutoffs that provide information on the quantity of longitudinal reliability, within-subject similarity was compared to between-subject similarity showing if responses of one subject at T0 were more similar to themselves at T1 than to responses of all others at T1. The approach to the assessment of similarity was derived from the idea of representational similarity analysis (RSA) introduced by <xref ref-type="bibr" rid="bib60">Kriegeskorte et al., 2008</xref> and previously used by <xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref> for the comparison of fMRI BOLD activation patterns between different sessions.</p><p>Here, within-subject similarity was calculated by correlating (Pearson’s correlation coefficient) (1) individual trial-by-trial SCRs and (2) the first-level response patterns of brain activation for CS discrimination (i.e., CS+ > CS−) of each individual subject between T0 and T1 resulting in one value of within-subject similarity per subject (e.g., SCR acquisition trials of subject 1 at T0 were correlated with SCR acquisition trials of subject 1 at T1). Between-subject similarity was calculated by correlating trial-by-trial SCRs or the first-level response patterns of brain activation of each individual subject at T0 with those of all other individuals at T1 (e.g., SCR acquisition trials of subject 1 at T0 were correlated with SCR acquisition trials of subject 2–71 at T1). This resulted in 70 correlation coefficients for each subject. These correlation coefficients were then averaged to yield one correlation coefficient per subject as an indicator of between-subject similarity.</p><p>For comparisons of within- and between-subject similarity in SCR and BOLD fMRI, similarities were Fisher <italic>r</italic>-to-<italic>z</italic> transformed and compared using paired <italic>t</italic>-tests or Welsh tests in cases where the assumption of equal variances was not met. Cohen’s <italic>d</italic> is reported as effect size.</p><p>Note that within-subject similarities of SCRs could not be calculated for participants with a single non-zero response at the same trial (e.g., trial 1) at both time points or only zero responses to the CS+ or CS− in one particular phase. This is because arrays that include only zeros can not be correlated and correlations of 1 (e.g., resulting from non-zero responses at the same trial at both time points) result in infinite Fisher <italic>r</italic>-to-<italic>z</italic> transformed correlations. Thus, different numbers of participants had to be included in the analyses of SCRs during acquisition (<italic>N</italic><sub>CS discrimination</sub> = 65, <italic>N</italic><sub>CS</sub><italic><sub>+</sub></italic> = 62, <italic>N</italic><sub>CS−</sub> = 56, <italic>N</italic><sub>US</sub> = 71) and extinction training (<italic>N</italic><sub>CS discrimination</sub> = 45, <italic>N</italic><sub>CS</sub><italic><sub>+</sub></italic> = 40, <italic>N</italic><sub>CS</sub><italic><sub>−</sub></italic> = 32).</p></sec><sec id="s4-9-3"><title>Overlap at the individual level</title><p>For BOLD fMRI, overlap in individual subject activation patterns across both time points was calculated as a third indicator of reliability at the individual level. Thus, overlap was determined separately for experimental phases by including data from both time points T0 and T1. To this end, activation maps from first-level contrasts (here CS+ > CS or CS discrimination) were compared such that the degree of overlap of significant voxels at a liberal threshold of p<sub>uncorrected</sub> < 0.01 between T0 and T1 was determined and expressed as the Dice and Jaccard coefficients (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>). Both coefficients range from 0 (no overlap) to 1 (perfect overlap), with the Jaccard index being easily interpretable as percent overlap (<xref ref-type="bibr" rid="bib34">Fröhner et al., 2019</xref>). While overlap reflects the degree of voxels activated at both time points, similarity measures (see above) are based on the correlation of activated voxels between time points and can be considered a continuous approach based on CS+ > CS− contrast specific beta values and not thresholded T-maps.</p></sec></sec><sec id="s4-10"><title>Longitudinal reliability at the group level</title><p>As opposed to longitudinal reliability at the individual level which indicates the stability of individual responses across time points, longitudinal reliability at the group level refers to how stable group average responding is over time. Longitudinal reliability at the group level was calculated separately for experimental phases by including data from both time points T0 and T1.</p><p>We define longitudinal reliability at the group level (1) for SCRs as the percentage of explained variance of group averaged trials at T1 by group averaged trials at T0 (i.e., <italic>R</italic> squared) and (2) for BOLD fMRI as the degree of overlap of group averaged activated voxels between both time points. Different analysis approaches were chosen as SCR and BOLD fMRI data are inherently different measures: trial-by-trial analyses in fMRI require slow-event related designs with long ITIs as well as fixed trial orders and ideally partial reinforcement rate to not confound CS and US responses (<xref ref-type="bibr" rid="bib127">Visser et al., 2016</xref>). Hence, trial-by-trial analyses were not possible given our design and thus overlap at a group level was defined as overlap at voxel rather than at trial level.</p><p>For SCRs, simple linear regressions were computed with group averaged SCR trials at T0 as independent and group averaged SCR trials at T1 as dependent variable and R squared was extracted. This was done separately for experimental phases. Although the Pearson’s correlation coefficient is often calculated to determine longitudinal reliability, <italic>R</italic> squared, which like overlap can also be expressed as a percentage, appears closest to the concept of overlap of significant voxels at T0 and T1 as applied to BOLD fMRI data.</p><p>For overlap in BOLD fMRI at the group level, the degree of overlap of significant voxels between both time points was determined for aggregated group-level activations instead of single subject-level activation patterns (see ‘Overlap at the individual level’) and expressed using the Dice and Jaccard indices as described above.</p></sec><sec id="s4-11"><title>Cross-phases predictability of conditioned responding</title><p>Simple linear regressions were calculated to assess the predictability of SCRs and fear ratings across experimental phases at T0. During data analysis, inspection of the data revealed heteroscedasticity. Therefore and deviating from the pre-registration, regressions with robust standard errors were calculated by using the HC3 estimator (<xref ref-type="bibr" rid="bib48">Hayes and Cai, 2007</xref>). Two consecutive phases represent the independent and the dependent variable, respectively, with the preceding phase as the independent variable and the following phase as the dependent variable. For SCR and fear ratings, standardized betas as derived from linear regressions are reported. In simple linear regression, as implemented here, standardized betas can be also interpreted as Pearson’s correlation coefficients.</p><p>For fMRI data, we adopted the cross-phases predictability analysis of SCR and fear ratings by calculating Pearson’s correlation coefficients between patterns of voxel activation (i.e., first-level beta maps). Correlations were first calculated at the individual subject level and subsequently averaged.</p><p>Standardized betas (resulting from SCR and fear rating regressions) and correlation coefficients (resulting from BOLD fMRI correlational tests) were interpreted as demonstrating weak, moderate, or strong associations between variables with values of <0.4, ≥0.4, and ≥0.7, respectively (<xref ref-type="bibr" rid="bib20">Dancey and Reidy, 2007</xref>). Tables containing regression parameters beyond the standardized betas depicted in <xref ref-type="fig" rid="fig5">Figure 5A, B</xref> are presented in the Supplement (see <xref ref-type="supplementary-material" rid="supp7">Supplementary file 7</xref>, <xref ref-type="supplementary-material" rid="supp8">Supplementary file 8</xref>).</p><p>For SCR and fear rating predictions, we assessed if predictions differ in their strength or direction when they are summarized across certain data specifications (see <xref ref-type="table" rid="table1">Table 1</xref>). For BOLD fMRI, correlation coefficients were pooled across ROIs. <italic>T</italic>-tests or Welch tests in cases where the assumption of equal variances was not met were performed on individual Fisher <italic>r</italic>-to-<italic>z</italic> transformed standardized betas (SCR and fear ratings) or correlation coefficients (BOLD fMRI). We highlight that these analyses can be interpreted as an example for predictive validity (i.e., the extent to which a score on a test predicts a score on a criterion measure). As our aim here is, however, not validation, we use the term cross-phase prediction throughout. (More precisely, we believe that ‘cross-phase predictions’ in our study cannot be used interchangeably with ‘criterion or predictive validity’ since our aim was not to validate one experimental phase against the other. Predictive validity in psychometrics is defined as ‘the extent to which a score on a scale (or test) predicts scores on some criterion measure’ (cf. <xref ref-type="bibr" rid="bib19">Cronbach and Meehl, 1955</xref>). For instance, a cognitive test for job performance would have predictive validity if the observed correlation between the test score and the performance rating by the company were statistically significant. Rather, we investigated whether responses in earlier experimental phases could predict responses in later experimental phases – both of which cannot be expected to ‘measure the same thing’.)</p><p>For all statistical analyses described above, a level of p < 0.05 (two-sided) was considered significant. Since we were more interested in patterns of results and less in the result of one specific test, it was not necessary to correct for multiple comparisons. Moreover, multiverse approaches, as approximated in our study, are assumed to be insensitive to multiple comparisons (<xref ref-type="bibr" rid="bib69">Lonsdorf et al., 2022</xref>).</p><p>For data analyses and visualizations as well as for the creation of the manuscript, we used R (Version 4.1.3; <xref ref-type="bibr" rid="bib100">R Development Core Team, 2020</xref>) and the R-packages <italic>apa</italic> (<xref ref-type="bibr" rid="bib5">Aust and Barth, 2020</xref>; Version 0.3.3; <xref ref-type="bibr" rid="bib43">Gromer, 2020</xref>), <italic>car</italic> (Version 3.0.10; <xref ref-type="bibr" rid="bib31">Fox and Weisberg, 2019</xref>; <xref ref-type="bibr" rid="bib32">Fox et al., 2020</xref>), <italic>carData</italic> (Version 3.0.4; <xref ref-type="bibr" rid="bib32">Fox et al., 2020</xref>), <italic>cowplot</italic> (Version 1.1.1; <xref ref-type="bibr" rid="bib135">Wilke, 2020</xref>), <italic>DescTools</italic> (Version 0.99.42; <xref ref-type="bibr" rid="bib3">Andri mult, 2021</xref>), <italic>dplyr</italic> (Version 1.0.8; <xref ref-type="bibr" rid="bib134">Wickham et al., 2021</xref>), <italic>effsize</italic> (<xref ref-type="bibr" rid="bib121">Torchiano, 2020</xref>), <italic>flextable</italic> (Version 0.6.10; <xref ref-type="bibr" rid="bib40">Gohel, 2021</xref>), <italic>gghalves</italic> (Version 0.1.1; <xref ref-type="bibr" rid="bib120">Tiedemann, 2020</xref>), <italic>ggplot2</italic> (Version 3.3.5; <xref ref-type="bibr" rid="bib131">Wickham, 2016</xref>), <italic>ggpubr</italic> (Version 0.4.0; <xref ref-type="bibr" rid="bib53">Kassambara, 2020</xref>), <italic>ggsignif</italic> (Version 0.6.3; <xref ref-type="bibr" rid="bib15">Constantin and Patil, 2021</xref>), <italic>gridExtra</italic> (Version 2.3; <xref ref-type="bibr" rid="bib4">Auguie, 2017</xref>), <italic>here</italic> (Version 1.0.1; <xref ref-type="bibr" rid="bib82">Müller, 2020</xref>), <italic>kableExtra</italic> (Version 1.3.1; <xref ref-type="bibr" rid="bib145">Zhu, 2020</xref>), <italic>knitr</italic> (Version 1.37; <xref ref-type="bibr" rid="bib136">Xie, 2015</xref>), <italic>lm.beta</italic> (Version 1.5.1; <xref ref-type="bibr" rid="bib10">Behrendt, 2014</xref>), <italic>lmtest</italic> (Version 0.9.38; <xref ref-type="bibr" rid="bib140">Zeileis and Hothorn, 2002</xref>), <italic>officedown</italic> (Version 0.2.4; <xref ref-type="bibr" rid="bib41">Gohel and Ross, 2022</xref>), <italic>papaja</italic> (Version 0.1.0.9997; <xref ref-type="bibr" rid="bib5">Aust and Barth, 2020</xref>), <italic>patchwork</italic> (Version 1.1.0; <xref ref-type="bibr" rid="bib91">Pedersen, 2020</xref>), <italic>psych</italic> (Version 2.0.9; <xref ref-type="bibr" rid="bib102">Revelle, 2020</xref>), <italic>renv</italic> (Version 0.13.2; <xref ref-type="bibr" rid="bib123">Ushey, 2020</xref>), <italic>reshape2</italic> (Version 1.4.4; <xref ref-type="bibr" rid="bib130">Wickham, 2007</xref>), <italic>sandwich</italic> (<xref ref-type="bibr" rid="bib141">Zeileis, 2004</xref>; <xref ref-type="bibr" rid="bib143">Zeileis, 2006</xref>; Version 3.0.1; <xref ref-type="bibr" rid="bib144">Zeileis et al., 2020</xref>), <italic>stringr</italic> (Version 1.4.0; <xref ref-type="bibr" rid="bib132">Wickham, 2019</xref>), <italic>tidyr</italic> (Version 1.2.0; <xref ref-type="bibr" rid="bib133">Wickham, 2020</xref>), <italic>tinylabels</italic> (Version 0.2.3; <xref ref-type="bibr" rid="bib8">Barth, 2022</xref>), and <italic>zoo</italic> (Version 1.8.8; <xref ref-type="bibr" rid="bib142">Zeileis and Grothendieck, 2005</xref>).</p></sec></sec></body><back><sec sec-type="additional-information" id="s5"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="COI-statement" id="conf1"><p>No competing interests declared</p></fn><fn fn-type="COI-statement" id="conf2"><p>No competing interests declared</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>Conceptualization, Data curation, Software, Formal analysis, Visualization, Methodology, Writing - original draft, Pre-registration of the study</p></fn><fn fn-type="con" id="con2"><p>Conceptualization, Formal analysis, Visualization, Methodology, Writing - original draft</p></fn><fn fn-type="con" id="con3"><p>Data curation, Software, Investigation, Writing – review and editing</p></fn><fn fn-type="con" id="con4"><p>Formal analysis, Visualization, Methodology, Writing – review and editing, Pre-registration of the study</p></fn><fn fn-type="con" id="con5"><p>Conceptualization, Resources, Supervision, Funding acquisition, Methodology, Writing - original draft, Pre-registration of the study</p></fn></fn-group><fn-group content-type="ethics-information"><title>Ethics</title><fn fn-type="other"><p>All participants gave written informed consent to the protocol which was approved by the local ethics committee (PV 5157, Ethics Committee of the General Medical Council Hamburg). The study was conducted in accordance with the Declaration of Helsinki.</p></fn></fn-group></sec><sec sec-type="supplementary-material" id="s6"><title>Additional files</title><supplementary-material id="supp1"><label>Supplementary file 1.</label><caption><title>Overview of experimental specifications and results of five previous studies reporting test–retest reliabilities in human fear conditioning research.</title></caption><media xlink:href="elife-78717-supp1-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp2"><label>Supplementary file 2.</label><caption><title>Deviations from pre-registration.</title></caption><media xlink:href="elife-78717-supp2-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp3"><label>Supplementary file 3.</label><caption><title>ICC<sub>abs</sub> and ICC<sub>con</sub> for all data specifications of SCRs.</title></caption><media xlink:href="elife-78717-supp3-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp4"><label>Supplementary file 4.</label><caption><title>ICC<sub>abs</sub> and ICC<sub>con</sub> for all data specifications of fear ratings.</title></caption><media xlink:href="elife-78717-supp4-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp5"><label>Supplementary file 5.</label><caption><title>ICC<sub>abs</sub> and ICC<sub>con</sub> for CS discrimination during fear acquisition (Acq) and extinction training (Ext).</title></caption><media xlink:href="elife-78717-supp5-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp6"><label>Supplementary file 6.</label><caption><title>Paired sample <italic>t</italic>-tests comparing between- and within-subject similarity for whole brain activation pattern as well as activation pattern in the ROIs for acquisition training (Acq) and extinction training (Ext).</title></caption><media xlink:href="elife-78717-supp6-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp7"><label>Supplementary file 7.</label><caption><title>Detailed results of linear regressions: SCR.</title></caption><media xlink:href="elife-78717-supp7-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="supp8"><label>Supplementary file 8.</label><caption><title>Detailed results of linear regressions: fear ratings.</title></caption><media xlink:href="elife-78717-supp8-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="transrepform"><label>Transparent reporting form</label><media xlink:href="elife-78717-transrepform1-v3.docx" mimetype="application" mime-subtype="docx"/></supplementary-material><supplementary-material id="mdar"><label>MDAR checklist</label><media xlink:href="elife-78717-mdarchecklist1-v3.pdf" mimetype="application" mime-subtype="pdf"/></supplementary-material></sec><sec sec-type="data-availability" id="s7"><title>Data availability</title><p>The data that support the findings of this study and the R Markdown files that generate this manuscript are openly available in Zenodo at <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.7323547">https://doi.org/10.5281/zenodo.7323547</ext-link>.</p><p>The following dataset was generated:</p><p><element-citation publication-type="data" specific-use="isSupplementedBy" id="dataset1"><person-group person-group-type="author"><name><surname>Klingelhöfer-Jens</surname><given-names>M</given-names></name><name><surname>Ehlers</surname><given-names>MR</given-names></name><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Keyaniyan</surname><given-names>V</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2022">2022</year><data-title>Robust group- but limited individual-level (longitudinal) reliability and insights into cross-phases response prediction of conditioned fear</data-title><source>Zenodo</source><pub-id pub-id-type="doi">10.5281/zenodo.7323547</pub-id></element-citation></p></sec><ack id="ack"><title>Acknowledgements</title><p>The authors would like to thank Claudia Immisch, Janne Nold, Kevin Rozario, and Habiba Schiller for help with data collection and Karoline Rosenkranz for help with data preprocessing, Mario Reutter for methodological discussions and comments on an earlier draft as well as Juliane Tkotz for support with reproducible manuscript writing.</p></ack><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Aldridge</surname><given-names>VK</given-names></name><name><surname>Dovey</surname><given-names>TM</given-names></name><name><surname>Wade</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2017">2017</year><article-title>Assessing test-retest reliability of psychological measures</article-title><source>European Psychologist</source><volume>22</volume><fpage>207</fpage><lpage>218</lpage><pub-id pub-id-type="doi">10.1027/1016-9040/a000298</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Anderson</surname><given-names>KC</given-names></name><name><surname>Insel</surname><given-names>TR</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>The promise of extinction research for the prevention and treatment of anxiety disorders</article-title><source>Biological Psychiatry</source><volume>60</volume><fpage>319</fpage><lpage>321</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2006.06.022</pub-id><pub-id pub-id-type="pmid">16919521</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Andri mult</surname><given-names>S</given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>DescTools: tools for descriptive statistics</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/package=DescTools">https://cran.r-project.org/package=DescTools</ext-link></element-citation></ref><ref id="bib4"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Auguie</surname><given-names>B</given-names></name></person-group><year iso-8601-date="2017">2017</year><data-title>GridExtra: miscellaneous functions for “ grid ” graphics</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=gridExtra">https://CRAN.R-project.org/package=gridExtra</ext-link></element-citation></ref><ref id="bib5"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Aust</surname><given-names>F</given-names></name><name><surname>Barth</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Papaja: create APA manuscripts with R markdown</data-title><version designator="4.0.3">4.0.3</version><source>Github</source><ext-link ext-link-type="uri" xlink:href="https://github.com/crsh/papaja">https://github.com/crsh/papaja</ext-link></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bach</surname><given-names>DR</given-names></name><name><surname>Melinščak</surname><given-names>F</given-names></name><name><surname>Fleming</surname><given-names>SM</given-names></name><name><surname>Voelkle</surname><given-names>MC</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Calibrating the experimental measurement of psychological attributes</article-title><source>Nature Human Behaviour</source><volume>4</volume><fpage>1229</fpage><lpage>1235</lpage><pub-id pub-id-type="doi">10.1038/s41562-020-00976-8</pub-id><pub-id pub-id-type="pmid">33199857</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baker</surname><given-names>DH</given-names></name><name><surname>Vilidaite</surname><given-names>G</given-names></name><name><surname>Lygo</surname><given-names>FA</given-names></name><name><surname>Smith</surname><given-names>AK</given-names></name><name><surname>Flack</surname><given-names>TR</given-names></name><name><surname>Gouws</surname><given-names>AD</given-names></name><name><surname>Andrews</surname><given-names>TJ</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Power contours: optimising sample size and precision in experimental psychology and human neuroscience</article-title><source>Psychological Methods</source><volume>26</volume><fpage>295</fpage><lpage>314</lpage><pub-id pub-id-type="doi">10.1037/met0000337</pub-id><pub-id pub-id-type="pmid">32673043</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Barth</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2022">2022</year><data-title>Tinylabels: lightweight variable labels</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/package=tinylabels">https://cran.r-project.org/package=tinylabels</ext-link></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bechara</surname><given-names>A</given-names></name><name><surname>Tranel</surname><given-names>D</given-names></name><name><surname>Damasio</surname><given-names>H</given-names></name><name><surname>Adolphs</surname><given-names>R</given-names></name><name><surname>Rockland</surname><given-names>C</given-names></name><name><surname>Damasio</surname><given-names>AR</given-names></name></person-group><year iso-8601-date="1995">1995</year><article-title>Double dissociation of conditioning and declarative knowledge relative to the amygdala and hippocampus in humans</article-title><source>Science</source><volume>269</volume><fpage>1115</fpage><lpage>1118</lpage><pub-id pub-id-type="doi">10.1126/science.7652558</pub-id><pub-id pub-id-type="pmid">7652558</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Behrendt</surname><given-names>S</given-names></name></person-group><year iso-8601-date="2014">2014</year><data-title>Lm.beta: add standardized regression coefficients to lm-objects</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=lm.beta">https://CRAN.R-project.org/package=lm.beta</ext-link></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bernstein</surname><given-names>DP</given-names></name><name><surname>Stein</surname><given-names>JA</given-names></name><name><surname>Newcomb</surname><given-names>MD</given-names></name><name><surname>Walker</surname><given-names>E</given-names></name><name><surname>Pogge</surname><given-names>D</given-names></name><name><surname>Ahluvalia</surname><given-names>T</given-names></name><name><surname>Stokes</surname><given-names>J</given-names></name><name><surname>Handelsman</surname><given-names>L</given-names></name><name><surname>Medrano</surname><given-names>M</given-names></name><name><surname>Desmond</surname><given-names>D</given-names></name><name><surname>Zule</surname><given-names>W</given-names></name></person-group><year iso-8601-date="2003">2003</year><article-title>Development and validation of a brief screening version of the childhood trauma questionnaire</article-title><source>Child Abuse & Neglect</source><volume>27</volume><fpage>169</fpage><lpage>190</lpage><pub-id pub-id-type="doi">10.1016/s0145-2134(02)00541-0</pub-id><pub-id pub-id-type="pmid">12615092</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Boucsein</surname><given-names>W</given-names></name><name><surname>Fowles</surname><given-names>DC</given-names></name><name><surname>Grimnes</surname><given-names>S</given-names></name><name><surname>Ben-Shakhar</surname><given-names>G</given-names></name><name><surname>roth</surname><given-names>WT</given-names></name><name><surname>Dawson</surname><given-names>ME</given-names></name><name><surname>Filion</surname><given-names>DL</given-names></name><collab>Society for Psychophysiological Research Ad Hoc Committee on Electrodermal Measures</collab></person-group><year iso-8601-date="2012">2012</year><article-title>Publication recommendations for electrodermal measurements</article-title><source>Psychophysiology</source><volume>49</volume><fpage>1017</fpage><lpage>1034</lpage><pub-id pub-id-type="doi">10.1111/j.1469-8986.2012.01384.x</pub-id><pub-id pub-id-type="pmid">22680988</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bouton</surname><given-names>ME</given-names></name></person-group><year iso-8601-date="2004">2004</year><article-title>Context and behavioral processes in extinction: table 1</article-title><source>Learning & Memory</source><volume>11</volume><fpage>485</fpage><lpage>494</lpage><pub-id pub-id-type="doi">10.1101/lm.78804</pub-id><pub-id pub-id-type="pmid">15466298</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bouton</surname><given-names>ME</given-names></name><name><surname>García-Gutiérrez</surname><given-names>A</given-names></name><name><surname>Zilski</surname><given-names>J</given-names></name><name><surname>Moody</surname><given-names>EW</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>Extinction in multiple contexts does not necessarily make extinction less vulnerable to relapse</article-title><source>Behaviour Research and Therapy</source><volume>44</volume><fpage>983</fpage><lpage>994</lpage><pub-id pub-id-type="doi">10.1016/j.brat.2005.07.007</pub-id><pub-id pub-id-type="pmid">16198302</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Constantin</surname><given-names>AE</given-names></name><name><surname>Patil</surname><given-names>I</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Ggsignif: R Package for Displaying Significance Brackets for “Ggplot2.”</article-title><source>PsyArXiv</source><pub-id pub-id-type="doi">10.31234/osf.io/7awm6</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cooper</surname><given-names>SE</given-names></name><name><surname>van Dis</surname><given-names>EAM</given-names></name><name><surname>Hagenaars</surname><given-names>MA</given-names></name><name><surname>Krypotos</surname><given-names>AM</given-names></name><name><surname>Nemeroff</surname><given-names>CB</given-names></name><name><surname>Lissek</surname><given-names>S</given-names></name><name><surname>Engelhard</surname><given-names>IM</given-names></name><name><surname>Dunsmoor</surname><given-names>JE</given-names></name></person-group><year iso-8601-date="2022">2022a</year><article-title>A meta-analysis of conditioned fear generalization in anxiety-related disorders</article-title><source>Neuropsychopharmacology: Official Publication of the American College of Neuropsychopharmacology</source><volume>47</volume><fpage>1652</fpage><lpage>1661</lpage><pub-id pub-id-type="doi">10.1038/s41386-022-01332-2</pub-id><pub-id pub-id-type="pmid">35501429</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Cooper</surname><given-names>SE</given-names></name><name><surname>Dunsmoor</surname><given-names>JE</given-names></name><name><surname>Koval</surname><given-names>K</given-names></name><name><surname>Pino</surname><given-names>E</given-names></name><name><surname>Steinman</surname><given-names>S</given-names></name></person-group><year iso-8601-date="2022">2022b</year><article-title>Test-Retest Reliability of Human Threat Conditioning and Generalization</article-title><source>PsyArXiv</source><pub-id pub-id-type="doi">10.31234/osf.io/84uqz</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Craske</surname><given-names>MG</given-names></name><name><surname>Kircanski</surname><given-names>K</given-names></name><name><surname>Zelikowsky</surname><given-names>M</given-names></name><name><surname>Mystkowski</surname><given-names>J</given-names></name><name><surname>Chowdhury</surname><given-names>N</given-names></name><name><surname>Baker</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2008">2008</year><article-title>Optimizing inhibitory learning during exposure therapy</article-title><source>Behaviour Research and Therapy</source><volume>46</volume><fpage>5</fpage><lpage>27</lpage><pub-id pub-id-type="doi">10.1016/j.brat.2007.10.003</pub-id><pub-id pub-id-type="pmid">18005936</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cronbach</surname><given-names>LJ</given-names></name><name><surname>Meehl</surname><given-names>PE</given-names></name></person-group><year iso-8601-date="1955">1955</year><article-title>Construct validity in psychological tests</article-title><source>Psychological Bulletin</source><volume>52</volume><fpage>281</fpage><lpage>302</lpage><pub-id pub-id-type="doi">10.1037/h0040957</pub-id><pub-id pub-id-type="pmid">13245896</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Dancey</surname><given-names>CP</given-names></name><name><surname>Reidy</surname><given-names>J</given-names></name></person-group><year iso-8601-date="2007">2007</year><source>Statistics without Maths for Psychology: Using SPSS for Windows</source><publisher-loc>Harlow, England ; New York</publisher-loc><publisher-name>Pearson/Prentice Hall</publisher-name></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Desikan</surname><given-names>RS</given-names></name><name><surname>Ségonne</surname><given-names>F</given-names></name><name><surname>Fischl</surname><given-names>B</given-names></name><name><surname>Quinn</surname><given-names>BT</given-names></name><name><surname>Dickerson</surname><given-names>BC</given-names></name><name><surname>Blacker</surname><given-names>D</given-names></name><name><surname>Buckner</surname><given-names>RL</given-names></name><name><surname>Dale</surname><given-names>AM</given-names></name><name><surname>Maguire</surname><given-names>RP</given-names></name><name><surname>Hyman</surname><given-names>BT</given-names></name><name><surname>Albert</surname><given-names>MS</given-names></name><name><surname>Killiany</surname><given-names>RJ</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>An automated labeling system for subdividing the human cerebral cortex on MRI scans into gyral based regions of interest</article-title><source>NeuroImage</source><volume>31</volume><fpage>968</fpage><lpage>980</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2006.01.021</pub-id><pub-id pub-id-type="pmid">16530430</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>DeYoung</surname><given-names>CG</given-names></name><name><surname>Sassenberg</surname><given-names>T</given-names></name><name><surname>Abend</surname><given-names>R</given-names></name><name><surname>Allen</surname><given-names>T</given-names></name><name><surname>Beaty</surname><given-names>R</given-names></name><name><surname>Bellgrove</surname><given-names>M</given-names></name><name><surname>Blain</surname><given-names>SD</given-names></name><name><surname>Bzdok</surname><given-names>D</given-names></name><name><surname>Chavez</surname><given-names>R</given-names></name><name><surname>Engel</surname><given-names>SA</given-names></name><name><surname>Ma</surname><given-names>F</given-names></name><name><surname>Fornito</surname><given-names>A</given-names></name><name><surname>Genç</surname><given-names>E</given-names></name><name><surname>Goghari</surname><given-names>V</given-names></name><name><surname>Grazioplene</surname><given-names>R</given-names></name><name><surname>Hanson</surname><given-names>JL</given-names></name><name><surname>Haxby</surname><given-names>JV</given-names></name><name><surname>Hilger</surname><given-names>K</given-names></name><name><surname>Homan</surname><given-names>P</given-names></name><name><surname>Joyner</surname><given-names>K</given-names></name><name><surname>Kaczkurkin</surname><given-names>AN</given-names></name><name><surname>Latzman</surname><given-names>RD</given-names></name><name><surname>Martin</surname><given-names>EA</given-names></name><name><surname>Passamonti</surname><given-names>L</given-names></name><name><surname>Pickering</surname><given-names>A</given-names></name><name><surname>Safron</surname><given-names>A</given-names></name><name><surname>Servaas</surname><given-names>M</given-names></name><name><surname>Smillie</surname><given-names>LD</given-names></name><name><surname>Spreng</surname><given-names>RN</given-names></name><name><surname>Tiego</surname><given-names>J</given-names></name><name><surname>Viding</surname><given-names>E</given-names></name><name><surname>Wacker</surname><given-names>J</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Reproducible Between-Person Brain-Behavior Associations Do Not Always Require Thousands of Individuals</article-title><source>PsyArXiv</source><pub-id pub-id-type="doi">10.31234/osf.io/sfnmk</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Duits</surname><given-names>P</given-names></name><name><surname>Cath</surname><given-names>DC</given-names></name><name><surname>Lissek</surname><given-names>S</given-names></name><name><surname>Hox</surname><given-names>JJ</given-names></name><name><surname>Hamm</surname><given-names>AO</given-names></name><name><surname>Engelhard</surname><given-names>IM</given-names></name><name><surname>van den Hout</surname><given-names>MA</given-names></name><name><surname>Baas</surname><given-names>JMP</given-names></name></person-group><year iso-8601-date="2015">2015</year><article-title>Updated meta-analysis of classical fear conditioning in the anxiety disorders</article-title><source>Depression and Anxiety</source><volume>32</volume><fpage>239</fpage><lpage>253</lpage><pub-id pub-id-type="doi">10.1002/da.22353</pub-id><pub-id pub-id-type="pmid">25703487</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dunsmoor</surname><given-names>JE</given-names></name><name><surname>Cisler</surname><given-names>JM</given-names></name><name><surname>Fonzo</surname><given-names>GA</given-names></name><name><surname>Creech</surname><given-names>SK</given-names></name><name><surname>Nemeroff</surname><given-names>CB</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Laboratory models of post-traumatic stress disorder: the elusive bridge to translation</article-title><source>Neuron</source><volume>110</volume><fpage>1754</fpage><lpage>1776</lpage><pub-id pub-id-type="doi">10.1016/j.neuron.2022.03.001</pub-id><pub-id pub-id-type="pmid">35325617</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ehlers</surname><given-names>MR</given-names></name><name><surname>Nold</surname><given-names>J</given-names></name><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Klingelhöfer-Jens</surname><given-names>M</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Revisiting potential associations between brain morphology, fear acquisition and extinction through new data and a literature review</article-title><source>Scientific Reports</source><volume>10</volume><elocation-id>19894</elocation-id><pub-id pub-id-type="doi">10.1038/s41598-020-76683-1</pub-id><pub-id pub-id-type="pmid">33199738</pub-id></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elliott</surname><given-names>ML</given-names></name><name><surname>Knodt</surname><given-names>AR</given-names></name><name><surname>Ireland</surname><given-names>D</given-names></name><name><surname>Morris</surname><given-names>ML</given-names></name><name><surname>Poulton</surname><given-names>R</given-names></name><name><surname>Ramrakha</surname><given-names>S</given-names></name><name><surname>Sison</surname><given-names>ML</given-names></name><name><surname>Moffitt</surname><given-names>TE</given-names></name><name><surname>Caspi</surname><given-names>A</given-names></name><name><surname>Hariri</surname><given-names>AR</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>What is the test-retest reliability of common task-functional MRI measures? new empirical evidence and a meta-analysis</article-title><source>Psychological Science</source><volume>31</volume><fpage>792</fpage><lpage>806</lpage><pub-id pub-id-type="doi">10.1177/0956797620916786</pub-id><pub-id pub-id-type="pmid">32489141</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Essau</surname><given-names>CA</given-names></name><name><surname>Lewinsohn</surname><given-names>PM</given-names></name><name><surname>Lim</surname><given-names>JX</given-names></name><name><surname>Ho</surname><given-names>MR</given-names></name><name><surname>Rohde</surname><given-names>P</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Incidence, recurrence and comorbidity of anxiety disorders in four major developmental stages</article-title><source>Journal of Affective Disorders</source><volume>228</volume><fpage>248</fpage><lpage>253</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2017.12.014</pub-id><pub-id pub-id-type="pmid">29304469</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Feilong</surname><given-names>M</given-names></name><name><surname>Guntupalli</surname><given-names>JS</given-names></name><name><surname>Haxby</surname><given-names>JV</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>The neural basis of intelligence in fine-grained cortical topographies</article-title><source>eLife</source><volume>10</volume><elocation-id>e64058</elocation-id><pub-id pub-id-type="doi">10.7554/eLife.64058</pub-id><pub-id pub-id-type="pmid">33683205</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fisher</surname><given-names>AJ</given-names></name><name><surname>Medaglia</surname><given-names>JD</given-names></name><name><surname>Jeronimus</surname><given-names>BF</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Lack of group-to-individual generalizability is a threat to human subjects research</article-title><source>PNAS</source><volume>115</volume><fpage>E6106</fpage><lpage>E6115</lpage><pub-id pub-id-type="doi">10.1073/pnas.1711978115</pub-id><pub-id pub-id-type="pmid">29915059</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Foa</surname><given-names>EB</given-names></name><name><surname>Grayson</surname><given-names>JB</given-names></name><name><surname>Steketee</surname><given-names>GS</given-names></name><name><surname>Doppelt</surname><given-names>HG</given-names></name><name><surname>Turner</surname><given-names>RM</given-names></name><name><surname>Latimer</surname><given-names>PR</given-names></name></person-group><year iso-8601-date="1983">1983</year><article-title>Success and failure in the behavioral treatment of obsessive-compulsives</article-title><source>Journal of Consulting and Clinical Psychology</source><volume>51</volume><fpage>287</fpage><lpage>297</lpage><pub-id pub-id-type="doi">10.1037//0022-006x.51.2.287</pub-id><pub-id pub-id-type="pmid">6841773</pub-id></element-citation></ref><ref id="bib31"><element-citation publication-type="web"><person-group person-group-type="author"><name><surname>Fox</surname><given-names>J</given-names></name><name><surname>Weisberg</surname><given-names>S</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>An R companion to applied regression (Third)</article-title><ext-link ext-link-type="uri" xlink:href="https://socialsciences.mcmaster.ca/jfox/Books/Companion/">https://socialsciences.mcmaster.ca/jfox/Books/Companion/</ext-link><date-in-citation iso-8601-date="2020-05-17">May 17, 2020</date-in-citation></element-citation></ref><ref id="bib32"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Fox</surname><given-names>J</given-names></name><name><surname>Weisberg</surname><given-names>S</given-names></name><name><surname>Price</surname><given-names>B</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>CarData: companion to applied regression data sets</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=carData">https://CRAN.R-project.org/package=carData</ext-link></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fredrikson</surname><given-names>M</given-names></name><name><surname>Annas</surname><given-names>P</given-names></name><name><surname>Georgiades</surname><given-names>A</given-names></name><name><surname>Hursti</surname><given-names>T</given-names></name><name><surname>Tersman</surname><given-names>Z</given-names></name></person-group><year iso-8601-date="1993">1993</year><article-title>Internal consistency and temporal stability of classically conditioned skin conductance responses</article-title><source>Biological Psychology</source><volume>35</volume><fpage>153</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.1016/0301-0511(93)90011-v</pub-id><pub-id pub-id-type="pmid">8507744</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fröhner</surname><given-names>JH</given-names></name><name><surname>Teckentrup</surname><given-names>V</given-names></name><name><surname>Smolka</surname><given-names>MN</given-names></name><name><surname>Kroemer</surname><given-names>NB</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Addressing the reliability fallacy in fMRI: similar group effects may arise from unreliable individual effects</article-title><source>NeuroImage</source><volume>195</volume><fpage>174</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2019.03.053</pub-id><pub-id pub-id-type="pmid">30930312</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fullana</surname><given-names>MA</given-names></name><name><surname>Harrison</surname><given-names>BJ</given-names></name><name><surname>Soriano-Mas</surname><given-names>C</given-names></name><name><surname>Vervliet</surname><given-names>B</given-names></name><name><surname>Cardoner</surname><given-names>N</given-names></name><name><surname>Àvila-Parcet</surname><given-names>A</given-names></name><name><surname>Radua</surname><given-names>J</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Neural signatures of human fear conditioning: an updated and extended meta-analysis of fMRI studies</article-title><source>Molecular Psychiatry</source><volume>21</volume><fpage>500</fpage><lpage>508</lpage><pub-id pub-id-type="doi">10.1038/mp.2015.88</pub-id><pub-id pub-id-type="pmid">26122585</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fullana</surname><given-names>MA</given-names></name><name><surname>Dunsmoor</surname><given-names>JE</given-names></name><name><surname>Schruers</surname><given-names>KRJ</given-names></name><name><surname>Savage</surname><given-names>HS</given-names></name><name><surname>Bach</surname><given-names>DR</given-names></name><name><surname>Harrison</surname><given-names>BJ</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Human fear conditioning: from neuroscience to the clinic</article-title><source>Behaviour Research and Therapy</source><volume>124</volume><elocation-id>103528</elocation-id><pub-id pub-id-type="doi">10.1016/j.brat.2019.103528</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Galatzer-Levy</surname><given-names>IR</given-names></name><name><surname>Bonanno</surname><given-names>GA</given-names></name><name><surname>Bush</surname><given-names>DEA</given-names></name><name><surname>LeDoux</surname><given-names>JE</given-names></name></person-group><year iso-8601-date="2013">2013a</year><article-title>Heterogeneity in threat extinction learning: substantive and methodological considerations for identifying individual difference in response to stress</article-title><source>Frontiers in Behavioral Neuroscience</source><volume>7</volume><elocation-id>55</elocation-id><pub-id pub-id-type="doi">10.3389/fnbeh.2013.00055</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Galatzer-Levy</surname><given-names>IR</given-names></name><name><surname>Bryant</surname><given-names>RA</given-names></name></person-group><year iso-8601-date="2013">2013b</year><article-title>636,120 ways to have posttraumatic stress disorder</article-title><source>Perspectives on Psychological Science</source><volume>8</volume><fpage>651</fpage><lpage>662</lpage><pub-id pub-id-type="doi">10.1177/1745691613504115</pub-id><pub-id pub-id-type="pmid">26173229</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gershman</surname><given-names>SJ</given-names></name><name><surname>Hartley</surname><given-names>CA</given-names></name></person-group><year iso-8601-date="2015">2015</year><article-title>Individual differences in learning predict the return of fear</article-title><source>Learning & Behavior</source><volume>43</volume><fpage>243</fpage><lpage>250</lpage><pub-id pub-id-type="doi">10.3758/s13420-015-0176-z</pub-id><pub-id pub-id-type="pmid">26100524</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Gohel</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>Flextable: functions for tabular reporting</data-title><version designator="0.8.1">0.8.1</version><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=flextable">https://CRAN.R-project.org/package=flextable</ext-link></element-citation></ref><ref id="bib41"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Gohel</surname><given-names>D</given-names></name><name><surname>Ross</surname><given-names>N</given-names></name></person-group><year iso-8601-date="2022">2022</year><data-title>officedown: Enhanced 'R Markdown' format for 'Word' and 'PowerPoint'</data-title><version designator="0.2.4">0.2.4</version><source>CRAN</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=officedown">https://CRAN.R-project.org/package=officedown</ext-link></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Graham</surname><given-names>BM</given-names></name><name><surname>Milad</surname><given-names>MR</given-names></name></person-group><year iso-8601-date="2011">2011</year><article-title>The study of fear extinction: implications for anxiety disorders</article-title><source>American Journal of Psychiatry</source><volume>168</volume><fpage>1255</fpage><lpage>1265</lpage><pub-id pub-id-type="doi">10.1176/appi.ajp.2011.11040557</pub-id><pub-id pub-id-type="pmid">21865528</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Gromer</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Apa: format outputs of statistical tests according to APA guidelines</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=apa">https://CRAN.R-project.org/package=apa</ext-link></element-citation></ref><ref id="bib44"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gulliksen</surname><given-names>H</given-names></name></person-group><year iso-8601-date="1950">1950</year><chapter-title>Effect of group heterogeneity on test reliability</chapter-title><person-group person-group-type="editor"><name><surname>Gulliksen</surname><given-names>H</given-names></name></person-group><source>Theory of Mental Tests</source><publisher-loc>Hoboken, NJ</publisher-loc><publisher-name>Wiley</publisher-name><fpage>108</fpage><lpage>127</lpage></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haaker</surname><given-names>J</given-names></name><name><surname>Golkar</surname><given-names>A</given-names></name><name><surname>Hermans</surname><given-names>D</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>A review on human reinstatement studies: an overview and methodological challenges</article-title><source>Learning & Memory</source><volume>21</volume><fpage>424</fpage><lpage>440</lpage><pub-id pub-id-type="doi">10.1101/lm.036053.114</pub-id><pub-id pub-id-type="pmid">25128533</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harder</surname><given-names>JA</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>The multiverse of methods: extending the multiverse analysis to address data-collection decisions</article-title><source>Perspectives on Psychological Science</source><volume>15</volume><fpage>1158</fpage><lpage>1177</lpage><pub-id pub-id-type="doi">10.1177/1745691620917678</pub-id><pub-id pub-id-type="pmid">32598854</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Häuser</surname><given-names>W</given-names></name><name><surname>Schmutzer</surname><given-names>G</given-names></name><name><surname>Brähler</surname><given-names>E</given-names></name><name><surname>Glaesmer</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2011">2011</year><article-title>Maltreatment in childhood and adolescence: results from a survey of a representative sample of the german population</article-title><source>Deutsches Arzteblatt International</source><volume>108</volume><fpage>287</fpage><lpage>294</lpage><pub-id pub-id-type="doi">10.3238/arztebl.2011.0287</pub-id><pub-id pub-id-type="pmid">21629512</pub-id></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hayes</surname><given-names>AF</given-names></name><name><surname>Cai</surname><given-names>L</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>Using heteroskedasticity-consistent standard error estimators in OLS regression: an introduction and software implementation</article-title><source>Behavior Research Methods</source><volume>39</volume><fpage>709</fpage><lpage>722</lpage><pub-id pub-id-type="doi">10.3758/bf03192961</pub-id><pub-id pub-id-type="pmid">18183883</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hedge</surname><given-names>C</given-names></name><name><surname>Powell</surname><given-names>G</given-names></name><name><surname>Sumner</surname><given-names>P</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>The reliability paradox: why robust cognitive tasks do not produce reliable individual differences</article-title><source>Behavior Research Methods</source><volume>50</volume><fpage>1166</fpage><lpage>1186</lpage><pub-id pub-id-type="doi">10.3758/s13428-017-0935-1</pub-id><pub-id pub-id-type="pmid">28726177</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Herting</surname><given-names>MM</given-names></name><name><surname>Gautam</surname><given-names>P</given-names></name><name><surname>Chen</surname><given-names>Z</given-names></name><name><surname>Mezher</surname><given-names>A</given-names></name><name><surname>Vetter</surname><given-names>NC</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Test-Retest reliability of longitudinal task-based fMRI: implications for developmental studies</article-title><source>Developmental Cognitive Neuroscience</source><volume>33</volume><fpage>17</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1016/j.dcn.2017.07.001</pub-id><pub-id pub-id-type="pmid">29158072</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Infantolino</surname><given-names>ZP</given-names></name><name><surname>Luking</surname><given-names>KR</given-names></name><name><surname>Sauder</surname><given-names>CL</given-names></name><name><surname>Curtin</surname><given-names>JJ</given-names></name><name><surname>Hajcak</surname><given-names>G</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>Robust is not necessarily reliable: from within-subjects fMRI contrasts to between-subjects comparisons</article-title><source>NeuroImage</source><volume>173</volume><fpage>146</fpage><lpage>152</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2018.02.024</pub-id><pub-id pub-id-type="pmid">29458188</pub-id></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kalisch</surname><given-names>R</given-names></name><name><surname>Korenfeld</surname><given-names>E</given-names></name><name><surname>Stephan</surname><given-names>KE</given-names></name><name><surname>Weiskopf</surname><given-names>N</given-names></name><name><surname>Seymour</surname><given-names>B</given-names></name><name><surname>Dolan</surname><given-names>RJ</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>Context-Dependent human extinction memory is mediated by a ventromedial prefrontal and hippocampal network</article-title><source>The Journal of Neuroscience</source><volume>26</volume><fpage>9503</fpage><lpage>9511</lpage><pub-id pub-id-type="doi">10.1523/JNEUROSCI.2021-06.2006</pub-id><pub-id pub-id-type="pmid">16971534</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Kassambara</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Ggpubr: ’ ggplot2 ’ based publication ready plots</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=ggpubr">https://CRAN.R-project.org/package=ggpubr</ext-link></element-citation></ref><ref id="bib54"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Kennedy</surname><given-names>JT</given-names></name><name><surname>Harms</surname><given-names>MP</given-names></name><name><surname>Korucuoglu</surname><given-names>O</given-names></name><name><surname>Astafiev</surname><given-names>SV</given-names></name><name><surname>Barch</surname><given-names>DM</given-names></name><name><surname>Thompson</surname><given-names>WK</given-names></name><name><surname>Bjork</surname><given-names>JM</given-names></name><name><surname>Anokhin</surname><given-names>AP</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Reliability and Stability Challenges in ABCD Task FMRI Data</article-title><source>bioRxiv</source><pub-id pub-id-type="doi">10.1101/2021.10.08.463750</pub-id></element-citation></ref><ref id="bib55"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kline</surname><given-names>P</given-names></name></person-group><year iso-8601-date="2013">2013</year><source>Handbook of Psychological Testing</source><publisher-name>Routledge</publisher-name><pub-id pub-id-type="doi">10.4324/9781315812274</pub-id></element-citation></ref><ref id="bib56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname><given-names>R</given-names></name><name><surname>Yang</surname><given-names>Q</given-names></name><name><surname>Gordon</surname><given-names>E</given-names></name><name><surname>Xue</surname><given-names>A</given-names></name><name><surname>Yan</surname><given-names>X</given-names></name><name><surname>Orban</surname><given-names>C</given-names></name><name><surname>Zuo</surname><given-names>XN</given-names></name><name><surname>Spreng</surname><given-names>N</given-names></name><name><surname>Ge</surname><given-names>T</given-names></name><name><surname>Holmes</surname><given-names>A</given-names></name><name><surname>Eickhoff</surname><given-names>S</given-names></name><name><surname>Yeo</surname><given-names>BTT</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Individual-Specific areal-level parcellations improve functional connectivity prediction of behavior</article-title><source>Cerebral Cortex</source><volume>31</volume><fpage>4477</fpage><lpage>4500</lpage><pub-id pub-id-type="doi">10.1093/cercor/bhab101</pub-id><pub-id pub-id-type="pmid">33942058</pub-id></element-citation></ref><ref id="bib57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koo</surname><given-names>TK</given-names></name><name><surname>Li</surname><given-names>MY</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>A guideline of selecting and reporting intraclass correlation coefficients for reliability research</article-title><source>Journal of Chiropractic Medicine</source><volume>15</volume><fpage>155</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.1016/j.jcm.2016.02.012</pub-id><pub-id pub-id-type="pmid">27330520</pub-id></element-citation></ref><ref id="bib58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kozak</surname><given-names>MJ</given-names></name><name><surname>Foa</surname><given-names>EB</given-names></name><name><surname>Steketee</surname><given-names>G</given-names></name></person-group><year iso-8601-date="1988">1988</year><article-title>Process and outcome of exposure treatment with obsessive-compulsives: psychophysiological indicators of emotional processing</article-title><source>Behavior Therapy</source><volume>19</volume><fpage>157</fpage><lpage>169</lpage><pub-id pub-id-type="doi">10.1016/S0005-7894(88)80039-X</pub-id></element-citation></ref><ref id="bib59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kragel</surname><given-names>PA</given-names></name><name><surname>Han</surname><given-names>X</given-names></name><name><surname>Kraynak</surname><given-names>TE</given-names></name><name><surname>Gianaros</surname><given-names>PJ</given-names></name><name><surname>Wager</surname><given-names>TD</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Functional MRI can be highly reliable, but it depends on what you measure: a commentary on Elliott et al. (2020)</article-title><source>Psychological Science</source><volume>32</volume><fpage>622</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1177/0956797621989730</pub-id><pub-id pub-id-type="pmid">33685310</pub-id></element-citation></ref><ref id="bib60"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kriegeskorte</surname><given-names>N</given-names></name><name><surname>Mur</surname><given-names>M</given-names></name><name><surname>Bandettini</surname><given-names>P</given-names></name></person-group><year iso-8601-date="2008">2008</year><article-title>Representational similarity analysis-connecting the branches of systems neuroscience</article-title><source>Frontiers in Systems Neuroscience</source><volume>2</volume><elocation-id>4</elocation-id><pub-id pub-id-type="doi">10.3389/neuro.06.004.2008</pub-id><pub-id pub-id-type="pmid">19104670</pub-id></element-citation></ref><ref id="bib61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Gerlicher</surname><given-names>AMV</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Navigating the manyverse of skin conductance response quantification approaches-a direct comparison of trough-to-peak, baseline correction, and model-based approaches in ledalab and pspm</article-title><source>Psychophysiology</source><volume>59</volume><elocation-id>e14058</elocation-id><pub-id pub-id-type="doi">10.1111/psyp.14058</pub-id><pub-id pub-id-type="pmid">35365863</pub-id></element-citation></ref><ref id="bib62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lang</surname><given-names>PJ</given-names></name><name><surname>Melamed</surname><given-names>BG</given-names></name><name><surname>Hart</surname><given-names>J</given-names></name></person-group><year iso-8601-date="1970">1970</year><article-title>A psychophysiological analysis of fear modification using an automated desensitization procedure</article-title><source>Journal of Abnormal Psychology</source><volume>76</volume><fpage>220</fpage><lpage>234</lpage><pub-id pub-id-type="doi">10.1037/h0029875</pub-id><pub-id pub-id-type="pmid">5483369</pub-id></element-citation></ref><ref id="bib63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Levine</surname><given-names>DW</given-names></name><name><surname>Dunlap</surname><given-names>WP</given-names></name></person-group><year iso-8601-date="1982">1982</year><article-title>Power of the F test with skewed data: should one transform or not?</article-title><source>Psychological Bulletin</source><volume>92</volume><fpage>272</fpage><lpage>280</lpage><pub-id pub-id-type="doi">10.1037/0033-2909.92.1.272</pub-id></element-citation></ref><ref id="bib64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Haaker</surname><given-names>J</given-names></name><name><surname>Kalisch</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>Long-Term expression of human contextual fear and extinction memories involves amygdala, hippocampus and ventromedial prefrontal cortex: a reinstatement study in two independent samples</article-title><source>Social Cognitive and Affective Neuroscience</source><volume>9</volume><fpage>1973</fpage><lpage>1983</lpage><pub-id pub-id-type="doi">10.1093/scan/nsu018</pub-id><pub-id pub-id-type="pmid">24493848</pub-id></element-citation></ref><ref id="bib65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Menz</surname><given-names>MM</given-names></name><name><surname>Andreatta</surname><given-names>M</given-names></name><name><surname>Fullana</surname><given-names>MA</given-names></name><name><surname>Golkar</surname><given-names>A</given-names></name><name><surname>Haaker</surname><given-names>J</given-names></name><name><surname>Heitland</surname><given-names>I</given-names></name><name><surname>Hermann</surname><given-names>A</given-names></name><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Kruse</surname><given-names>O</given-names></name><name><surname>Meir Drexler</surname><given-names>S</given-names></name><name><surname>Meulders</surname><given-names>A</given-names></name><name><surname>Nees</surname><given-names>F</given-names></name><name><surname>Pittig</surname><given-names>A</given-names></name><name><surname>Richter</surname><given-names>J</given-names></name><name><surname>Römer</surname><given-names>S</given-names></name><name><surname>Shiban</surname><given-names>Y</given-names></name><name><surname>Schmitz</surname><given-names>A</given-names></name><name><surname>Straube</surname><given-names>B</given-names></name><name><surname>Vervliet</surname><given-names>B</given-names></name><name><surname>Wendt</surname><given-names>J</given-names></name><name><surname>Baas</surname><given-names>JMP</given-names></name><name><surname>Merz</surname><given-names>CJ</given-names></name></person-group><year iso-8601-date="2017">2017a</year><article-title>Don ’ T fear “ fear conditioning ”: methodological considerations for the design and analysis of studies on human fear acquisition, extinction, and return of fear</article-title><source>Neuroscience and Biobehavioral Reviews</source><volume>77</volume><fpage>247</fpage><lpage>285</lpage><pub-id pub-id-type="doi">10.1016/j.neubiorev.2017.02.026</pub-id><pub-id pub-id-type="pmid">28263758</pub-id></element-citation></ref><ref id="bib66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Merz</surname><given-names>CJ</given-names></name></person-group><year iso-8601-date="2017">2017b</year><article-title>More than just noise: inter-individual differences in fear acquisition, extinction and return of fear in humans-biological, experiential, temperamental factors, and methodological pitfalls</article-title><source>Neuroscience and Biobehavioral Reviews</source><volume>80</volume><fpage>703</fpage><lpage>728</lpage><pub-id pub-id-type="doi">10.1016/j.neubiorev.2017.07.007</pub-id><pub-id pub-id-type="pmid">28764976</pub-id></element-citation></ref><ref id="bib67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Klingelhöfer-Jens</surname><given-names>M</given-names></name><name><surname>Andreatta</surname><given-names>M</given-names></name><name><surname>Beckers</surname><given-names>T</given-names></name><name><surname>Chalkia</surname><given-names>A</given-names></name><name><surname>Gerlicher</surname><given-names>A</given-names></name><name><surname>Jentsch</surname><given-names>VL</given-names></name><name><surname>Meir Drexler</surname><given-names>S</given-names></name><name><surname>Mertens</surname><given-names>G</given-names></name><name><surname>Richter</surname><given-names>J</given-names></name><name><surname>Sjouwerman</surname><given-names>R</given-names></name><name><surname>Wendt</surname><given-names>J</given-names></name><name><surname>Merz</surname><given-names>CJ</given-names></name></person-group><year iso-8601-date="2019">2019a</year><article-title>Navigating the garden of forking paths for data exclusions in fear conditioning research</article-title><source>eLife</source><volume>8</volume><elocation-id>e52465</elocation-id><pub-id pub-id-type="doi">10.7554/eLife.52465</pub-id><pub-id pub-id-type="pmid">31841112</pub-id></element-citation></ref><ref id="bib68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Merz</surname><given-names>CJ</given-names></name><name><surname>Fullana</surname><given-names>MA</given-names></name></person-group><year iso-8601-date="2019">2019b</year><article-title>Fear extinction retention: is it what we think it is?</article-title><source>Biological Psychiatry</source><volume>85</volume><fpage>1074</fpage><lpage>1082</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2019.02.011</pub-id><pub-id pub-id-type="pmid">31005240</pub-id></element-citation></ref><ref id="bib69"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdorf</surname><given-names>TB</given-names></name><name><surname>Gerlicher</surname><given-names>A</given-names></name><name><surname>Klingelhöfer-Jens</surname><given-names>M</given-names></name><name><surname>Krypotos</surname><given-names>AM</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Multiverse analyses in fear conditioning research</article-title><source>Behaviour Research and Therapy</source><volume>153</volume><elocation-id>104072</elocation-id><pub-id pub-id-type="doi">10.1016/j.brat.2022.104072</pub-id><pub-id pub-id-type="pmid">35500540</pub-id></element-citation></ref><ref id="bib70"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lykken</surname><given-names>DT</given-names></name><name><surname>Venables</surname><given-names>PH</given-names></name></person-group><year iso-8601-date="1971">1971</year><article-title>Direct measurement of skin conductance: a proposal for standardization</article-title><source>Psychophysiology</source><volume>8</volume><fpage>656</fpage><lpage>672</lpage><pub-id pub-id-type="doi">10.1111/j.1469-8986.1971.tb00501.x</pub-id><pub-id pub-id-type="pmid">5116830</pub-id></element-citation></ref><ref id="bib71"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lykken</surname><given-names>DT</given-names></name></person-group><year iso-8601-date="1972">1972</year><article-title>Range correction applied to heart rate and to GSR data</article-title><source>Psychophysiology</source><volume>9</volume><fpage>373</fpage><lpage>379</lpage><pub-id pub-id-type="doi">10.1111/j.1469-8986.1972.tb03222.x</pub-id><pub-id pub-id-type="pmid">5034126</pub-id></element-citation></ref><ref id="bib72"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lynam</surname><given-names>DR</given-names></name><name><surname>Hoyle</surname><given-names>RH</given-names></name><name><surname>Newman</surname><given-names>JP</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>The perils of partialling: cautionary tales from aggression and psychopathy</article-title><source>Assessment</source><volume>13</volume><fpage>328</fpage><lpage>341</lpage><pub-id pub-id-type="doi">10.1177/1073191106290562</pub-id><pub-id pub-id-type="pmid">16880283</pub-id></element-citation></ref><ref id="bib73"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maitra</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2010">2010</year><article-title>A re-defined and generalized percent-overlap-of-activation measure for studies of fmri reproducibility and its use in identifying outlier activation maps</article-title><source>NeuroImage</source><volume>50</volume><fpage>124</fpage><lpage>135</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2009.11.070</pub-id><pub-id pub-id-type="pmid">19963068</pub-id></element-citation></ref><ref id="bib74"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Månsson</surname><given-names>KNT</given-names></name><name><surname>Waschke</surname><given-names>L</given-names></name><name><surname>Manzouri</surname><given-names>A</given-names></name><name><surname>Furmark</surname><given-names>T</given-names></name><name><surname>Fischer</surname><given-names>H</given-names></name><name><surname>Garrett</surname><given-names>DD</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Moment-to-moment brain signal variability reliably predicts psychiatric treatment outcome</article-title><source>Biological Psychiatry</source><volume>91</volume><fpage>658</fpage><lpage>666</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2021.09.026</pub-id><pub-id pub-id-type="pmid">34961621</pub-id></element-citation></ref><ref id="bib75"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Marek</surname><given-names>S</given-names></name><name><surname>Tervo-Clemmens</surname><given-names>B</given-names></name><name><surname>Calabro</surname><given-names>FJ</given-names></name><name><surname>Montez</surname><given-names>DF</given-names></name><name><surname>Kay</surname><given-names>BP</given-names></name><name><surname>Hatoum</surname><given-names>AS</given-names></name><name><surname>Dosenbach</surname><given-names>NUF</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Towards Reproducible Brain-Wide Association Studies</article-title><source>bioRxiv</source><pub-id pub-id-type="doi">10.1101/2020.08.21.257758</pub-id></element-citation></ref><ref id="bib76"><element-citation publication-type="software"><person-group person-group-type="author"><collab>MATLAB</collab></person-group><year iso-8601-date="2016">2016</year><data-title>Matlab, natick</data-title><version designator="0.1">0.1</version><publisher-name>The MathWorks, Inc</publisher-name><ext-link ext-link-type="uri" xlink:href="https://www.mathworks.com/company/jobs/resources/locations/us-natick.html">https://www.mathworks.com/company/jobs/resources/locations/us-natick.html</ext-link></element-citation></ref><ref id="bib77"><element-citation publication-type="software"><person-group person-group-type="author"><collab>MATLAB</collab></person-group><year iso-8601-date="2019">2019</year><data-title>Matlab, sherborn</data-title><version designator="4.1">4.1</version><publisher-name>The MathWorks, Inc</publisher-name><ext-link ext-link-type="uri" xlink:href="https://www.indeed.com/jobs?q=The+Mathworks&l=Sherborn,+MA&redirected=1">https://www.indeed.com/jobs?q=The+Mathworks&l=Sherborn,+MA&redirected=1</ext-link></element-citation></ref><ref id="bib78"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Milad</surname><given-names>MR</given-names></name><name><surname>Wright</surname><given-names>CI</given-names></name><name><surname>Orr</surname><given-names>SP</given-names></name><name><surname>Pitman</surname><given-names>RK</given-names></name><name><surname>Quirk</surname><given-names>GJ</given-names></name><name><surname>Rauch</surname><given-names>SL</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>Recall of fear extinction in humans activates the ventromedial prefrontal cortex and hippocampus in concert</article-title><source>Biological Psychiatry</source><volume>62</volume><fpage>446</fpage><lpage>454</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2006.10.011</pub-id><pub-id pub-id-type="pmid">17217927</pub-id></element-citation></ref><ref id="bib79"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Milad</surname><given-names>MR</given-names></name><name><surname>Pitman</surname><given-names>RK</given-names></name><name><surname>Ellis</surname><given-names>CB</given-names></name><name><surname>Gold</surname><given-names>AL</given-names></name><name><surname>Shin</surname><given-names>LM</given-names></name><name><surname>Lasko</surname><given-names>NB</given-names></name><name><surname>Zeidan</surname><given-names>MA</given-names></name><name><surname>Handwerger</surname><given-names>K</given-names></name><name><surname>Orr</surname><given-names>SP</given-names></name><name><surname>Rauch</surname><given-names>SL</given-names></name></person-group><year iso-8601-date="2009">2009</year><article-title>Neurobiological basis of failure to recall extinction memory in posttraumatic stress disorder</article-title><source>Biological Psychiatry</source><volume>66</volume><fpage>1075</fpage><lpage>1082</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2009.06.026</pub-id><pub-id pub-id-type="pmid">19748076</pub-id></element-citation></ref><ref id="bib80"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Milad</surname><given-names>MR</given-names></name><name><surname>Quirk</surname><given-names>GJ</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>Fear extinction as a model for translational neuroscience: ten years of progress</article-title><source>Annual Review of Psychology</source><volume>63</volume><fpage>129</fpage><lpage>151</lpage><pub-id pub-id-type="doi">10.1146/annurev.psych.121208.131631</pub-id><pub-id pub-id-type="pmid">22129456</pub-id></element-citation></ref><ref id="bib81"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moriarity</surname><given-names>DP</given-names></name><name><surname>Alloy</surname><given-names>LB</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Back to basics: the importance of measurement properties in biological psychiatry</article-title><source>Neuroscience and Biobehavioral Reviews</source><volume>123</volume><fpage>72</fpage><lpage>82</lpage><pub-id pub-id-type="doi">10.1016/j.neubiorev.2021.01.008</pub-id><pub-id pub-id-type="pmid">33497789</pub-id></element-citation></ref><ref id="bib82"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Müller</surname><given-names>K</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Here: a simpler way to find your files</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=here">https://CRAN.R-project.org/package=here</ext-link></element-citation></ref><ref id="bib83"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Myers</surname><given-names>KM</given-names></name><name><surname>Davis</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>Mechanisms of fear extinction</article-title><source>Molecular Psychiatry</source><volume>12</volume><fpage>120</fpage><lpage>150</lpage><pub-id pub-id-type="doi">10.1038/sj.mp.4001939</pub-id><pub-id pub-id-type="pmid">17160066</pub-id></element-citation></ref><ref id="bib84"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ney</surname><given-names>LJ</given-names></name><name><surname>Laing</surname><given-names>PAF</given-names></name><name><surname>Steward</surname><given-names>T</given-names></name><name><surname>Zuj</surname><given-names>DV</given-names></name><name><surname>Dymond</surname><given-names>S</given-names></name><name><surname>Felmingham</surname><given-names>KL</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Inconsistent analytic strategies reduce robustness in fear extinction via skin conductance response</article-title><source>Psychophysiology</source><volume>57</volume><elocation-id>11</elocation-id><pub-id pub-id-type="doi">10.1111/psyp.13650</pub-id></element-citation></ref><ref id="bib85"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ney</surname><given-names>LJ</given-names></name><name><surname>Laing</surname><given-names>PAF</given-names></name><name><surname>Steward</surname><given-names>T</given-names></name><name><surname>Zuj</surname><given-names>DV</given-names></name><name><surname>Dymond</surname><given-names>S</given-names></name><name><surname>Harrison</surname><given-names>B</given-names></name><name><surname>Graham</surname><given-names>B</given-names></name><name><surname>Felmingham</surname><given-names>KL</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Methodological implications of sample size and extinction gradient on the robustness of fear conditioning across different analytic strategies</article-title><source>PLOS ONE</source><volume>17</volume><elocation-id>e0268814</elocation-id><pub-id pub-id-type="doi">10.1371/journal.pone.0268814</pub-id><pub-id pub-id-type="pmid">35609058</pub-id></element-citation></ref><ref id="bib86"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nieuwenhuys</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>The insular cortex</article-title><source>Progress in Brain Research</source><volume>195</volume><fpage>123</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.1016/B978-0-444-53860-4.00007-6</pub-id></element-citation></ref><ref id="bib87"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Noble</surname><given-names>S</given-names></name><name><surname>Scheinost</surname><given-names>D</given-names></name><name><surname>Constable</surname><given-names>RT</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>A guide to the measurement and interpretation of fMRI test-retest reliability</article-title><source>Current Opinion in Behavioral Sciences</source><volume>40</volume><fpage>27</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1016/j.cobeha.2020.12.012</pub-id><pub-id pub-id-type="pmid">33585666</pub-id></element-citation></ref><ref id="bib88"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nosek</surname><given-names>BA</given-names></name><name><surname>Ebersole</surname><given-names>CR</given-names></name><name><surname>DeHaven</surname><given-names>AC</given-names></name><name><surname>Mellor</surname><given-names>DT</given-names></name></person-group><year iso-8601-date="2018">2018</year><article-title>The preregistration revolution</article-title><source>PNAS</source><volume>115</volume><fpage>2600</fpage><lpage>2606</lpage><pub-id pub-id-type="doi">10.1073/pnas.1708274114</pub-id><pub-id pub-id-type="pmid">29531091</pub-id></element-citation></ref><ref id="bib89"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Parsons</surname><given-names>S</given-names></name><name><surname>Kruijt</surname><given-names>AW</given-names></name><name><surname>Fox</surname><given-names>E</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Psychological science needs a standard practice of reporting the reliability of cognitive-behavioral measurements</article-title><source>Advances in Methods and Practices in Psychological Science</source><volume>2</volume><fpage>378</fpage><lpage>395</lpage><pub-id pub-id-type="doi">10.1177/2515245919879695</pub-id></element-citation></ref><ref id="bib90"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Parsons</surname><given-names>S</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Exploring Reliability Heterogeneity with Multiverse Analyses: Data Processing Decisions Unpredictably Influence Measurement Reliability</article-title><source>PsyArXiv</source><pub-id pub-id-type="doi">10.31234/osf.io/y6tcz</pub-id></element-citation></ref><ref id="bib91"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Pedersen</surname><given-names>TL</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Patchwork: the composer of plots</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=patchwork">https://CRAN.R-project.org/package=patchwork</ext-link></element-citation></ref><ref id="bib92"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pitman</surname><given-names>RK</given-names></name><name><surname>Orr</surname><given-names>SP</given-names></name><name><surname>Altman</surname><given-names>B</given-names></name><name><surname>Longpre</surname><given-names>RE</given-names></name><name><surname>Poiré</surname><given-names>RE</given-names></name><name><surname>Macklin</surname><given-names>ML</given-names></name><name><surname>Michaels</surname><given-names>MJ</given-names></name><name><surname>Steketee</surname><given-names>GS</given-names></name></person-group><year iso-8601-date="1996">1996</year><article-title>Emotional processing and outcome of imaginal flooding therapy in Vietnam veterans with chronic posttraumatic stress disorder</article-title><source>Comprehensive Psychiatry</source><volume>37</volume><fpage>409</fpage><lpage>418</lpage><pub-id pub-id-type="doi">10.1016/s0010-440x(96)90024-3</pub-id><pub-id pub-id-type="pmid">8932965</pub-id></element-citation></ref><ref id="bib93"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Plendl</surname><given-names>W</given-names></name><name><surname>Wotjak</surname><given-names>CT</given-names></name></person-group><year iso-8601-date="2010">2010</year><article-title>Dissociation of within- and between-session extinction of conditioned fear</article-title><source>The Journal of Neuroscience</source><volume>30</volume><fpage>4990</fpage><lpage>4998</lpage><pub-id pub-id-type="doi">10.1523/JNEUROSCI.6038-09.2010</pub-id><pub-id pub-id-type="pmid">20371819</pub-id></element-citation></ref><ref id="bib94"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Plichta</surname><given-names>MM</given-names></name><name><surname>Schwarz</surname><given-names>AJ</given-names></name><name><surname>Grimm</surname><given-names>O</given-names></name><name><surname>Morgen</surname><given-names>K</given-names></name><name><surname>Mier</surname><given-names>D</given-names></name><name><surname>Haddad</surname><given-names>L</given-names></name><name><surname>Gerdes</surname><given-names>ABM</given-names></name><name><surname>Sauer</surname><given-names>C</given-names></name><name><surname>Tost</surname><given-names>H</given-names></name><name><surname>Esslinger</surname><given-names>C</given-names></name><name><surname>Colman</surname><given-names>P</given-names></name><name><surname>Wilson</surname><given-names>F</given-names></name><name><surname>Kirsch</surname><given-names>P</given-names></name><name><surname>Meyer-Lindenberg</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>Test-Retest reliability of evoked BOLD signals from a cognitive-emotive fMRI test battery</article-title><source>NeuroImage</source><volume>60</volume><fpage>1746</fpage><lpage>1758</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2012.01.129</pub-id><pub-id pub-id-type="pmid">22330316</pub-id></element-citation></ref><ref id="bib95"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Plichta</surname><given-names>MM</given-names></name><name><surname>Grimm</surname><given-names>O</given-names></name><name><surname>Morgen</surname><given-names>K</given-names></name><name><surname>Mier</surname><given-names>D</given-names></name><name><surname>Sauer</surname><given-names>C</given-names></name><name><surname>Haddad</surname><given-names>L</given-names></name><name><surname>Tost</surname><given-names>H</given-names></name><name><surname>Esslinger</surname><given-names>C</given-names></name><name><surname>Kirsch</surname><given-names>P</given-names></name><name><surname>Schwarz</surname><given-names>AJ</given-names></name><name><surname>Meyer-Lindenberg</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>Amygdala habituation: a reliable fMRI phenotype</article-title><source>NeuroImage</source><volume>103</volume><fpage>383</fpage><lpage>390</lpage><pub-id pub-id-type="doi">10.1016/j.neuroimage.2014.09.059</pub-id><pub-id pub-id-type="pmid">25284303</pub-id></element-citation></ref><ref id="bib96"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prenoveau</surname><given-names>JM</given-names></name><name><surname>Craske</surname><given-names>MG</given-names></name><name><surname>Liao</surname><given-names>B</given-names></name><name><surname>Ornitz</surname><given-names>EM</given-names></name></person-group><year iso-8601-date="2013">2013</year><article-title>Human fear conditioning and extinction: timing is everything…or is it?</article-title><source>Biological Psychology</source><volume>92</volume><fpage>59</fpage><lpage>68</lpage><pub-id pub-id-type="doi">10.1016/j.biopsycho.2012.02.005</pub-id><pub-id pub-id-type="pmid">22349998</pub-id></element-citation></ref><ref id="bib97"><element-citation publication-type="software"><person-group person-group-type="author"><collab>Presentation software</collab></person-group><year iso-8601-date="2010">2010</year><data-title>Presentation software</data-title><publisher-name>Neurobehavioral Systems, Inc</publisher-name><ext-link ext-link-type="uri" xlink:href="https://www.neurobs.com/">https://www.neurobs.com/</ext-link></element-citation></ref><ref id="bib98"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rachman</surname><given-names>S</given-names></name></person-group><year iso-8601-date="1989">1989</year><article-title>The return of fear: review and prospect</article-title><source>Clinical Psychology Review</source><volume>9</volume><fpage>147</fpage><lpage>168</lpage><pub-id pub-id-type="doi">10.1016/0272-7358(89)90025-1</pub-id></element-citation></ref><ref id="bib99"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rauch</surname><given-names>SAM</given-names></name><name><surname>Foa</surname><given-names>EB</given-names></name><name><surname>Furr</surname><given-names>JM</given-names></name><name><surname>Filip</surname><given-names>JC</given-names></name></person-group><year iso-8601-date="2004">2004</year><article-title>Imagery vividness and perceived anxious arousal in prolonged exposure treatment for PTSD</article-title><source>Journal of Traumatic Stress</source><volume>17</volume><fpage>461</fpage><lpage>465</lpage><pub-id pub-id-type="doi">10.1007/s10960-004-5794-8</pub-id><pub-id pub-id-type="pmid">15730064</pub-id></element-citation></ref><ref id="bib100"><element-citation publication-type="software"><person-group person-group-type="author"><collab>R Development Core Team</collab></person-group><year iso-8601-date="2020">2020</year><data-title>R: a language and environment for statistical computing</data-title><publisher-loc>Vienna, Austria</publisher-loc><publisher-name>R Foundation for Statistical Computing</publisher-name><ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/index.html">https://www.r-project.org/index.html</ext-link></element-citation></ref><ref id="bib101"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Revelle</surname><given-names>W</given-names></name></person-group><year iso-8601-date="1979">1979</year><article-title>Hierarchical cluster analysis and the internal structure of tests</article-title><source>Multivariate Behavioral Research</source><volume>14</volume><fpage>57</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1207/s15327906mbr1401_4</pub-id><pub-id pub-id-type="pmid">26766619</pub-id></element-citation></ref><ref id="bib102"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Revelle</surname><given-names>W</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Psych: procedures for psychological, psychometric, and personality research</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=psych">https://CRAN.R-project.org/package=psych</ext-link></element-citation></ref><ref id="bib103"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ridderbusch</surname><given-names>IC</given-names></name><name><surname>Wroblewski</surname><given-names>A</given-names></name><name><surname>Yang</surname><given-names>Y</given-names></name><name><surname>Richter</surname><given-names>J</given-names></name><name><surname>Hollandt</surname><given-names>M</given-names></name><name><surname>Hamm</surname><given-names>AO</given-names></name><name><surname>Wittchen</surname><given-names>HU</given-names></name><name><surname>Ströhle</surname><given-names>A</given-names></name><name><surname>Arolt</surname><given-names>V</given-names></name><name><surname>Margraf</surname><given-names>J</given-names></name><name><surname>Lueken</surname><given-names>U</given-names></name><name><surname>Herrmann</surname><given-names>MJ</given-names></name><name><surname>Kircher</surname><given-names>T</given-names></name><name><surname>Straube</surname><given-names>B</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Neural adaptation of cingulate and insular activity during delayed fear extinction: a replicable pattern across assessment sites and repeated measurements</article-title><source>NeuroImage</source><volume>237</volume><elocation-id>118157</elocation-id><pub-id pub-id-type="doi">10.1016/j.neuroimage.2021.118157</pub-id><pub-id pub-id-type="pmid">34020017</pub-id></element-citation></ref><ref id="bib104"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Riley</surname><given-names>WT</given-names></name><name><surname>McCormick</surname><given-names>MGF</given-names></name><name><surname>Simon</surname><given-names>EM</given-names></name><name><surname>Stack</surname><given-names>K</given-names></name><name><surname>Pushkin</surname><given-names>Y</given-names></name><name><surname>Overstreet</surname><given-names>MM</given-names></name><name><surname>Carmona</surname><given-names>JJ</given-names></name><name><surname>Magakian</surname><given-names>C</given-names></name></person-group><year iso-8601-date="1995">1995</year><article-title>Effects of alprazolam dose on the induction and habituation processes during behavioral panic induction treatment</article-title><source>Journal of Anxiety Disorders</source><volume>9</volume><fpage>217</fpage><lpage>227</lpage><pub-id pub-id-type="doi">10.1016/0887-6185(95)00003-7</pub-id></element-citation></ref><ref id="bib105"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rothbaum</surname><given-names>BO</given-names></name><name><surname>Price</surname><given-names>M</given-names></name><name><surname>Jovanovic</surname><given-names>T</given-names></name><name><surname>Norrholm</surname><given-names>SD</given-names></name><name><surname>Gerardi</surname><given-names>M</given-names></name><name><surname>Dunlop</surname><given-names>B</given-names></name><name><surname>Davis</surname><given-names>M</given-names></name><name><surname>Bradley</surname><given-names>B</given-names></name><name><surname>Duncan</surname><given-names>EJ</given-names></name><name><surname>Rizzo</surname><given-names>A</given-names></name><name><surname>Ressler</surname><given-names>KJ</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>A randomized, double-blind evaluation of D-cycloserine or alprazolam combined with virtual reality exposure therapy for posttraumatic stress disorder in Iraq and Afghanistan war veterans</article-title><source>The American Journal of Psychiatry</source><volume>171</volume><fpage>640</fpage><lpage>648</lpage><pub-id pub-id-type="doi">10.1176/appi.ajp.2014.13121625</pub-id><pub-id pub-id-type="pmid">24743802</pub-id></element-citation></ref><ref id="bib106"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Scharfenort</surname><given-names>R</given-names></name><name><surname>Menz</surname><given-names>M</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Adversity-induced relapse of fear: neural mechanisms and implications for relapse prevention from a study on experimentally induced return-of-fear following fear conditioning and extinction</article-title><source>Translational Psychiatry</source><volume>6</volume><elocation-id>e858</elocation-id><pub-id pub-id-type="doi">10.1038/tp.2016.126</pub-id><pub-id pub-id-type="pmid">27434492</pub-id></element-citation></ref><ref id="bib107"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schümann</surname><given-names>D</given-names></name><name><surname>Joue</surname><given-names>G</given-names></name><name><surname>Jordan</surname><given-names>P</given-names></name><name><surname>Bayer</surname><given-names>J</given-names></name><name><surname>Sommer</surname><given-names>T</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Test-Retest reliability of the emotional enhancement of memory</article-title><source>Memory</source><volume>28</volume><fpage>49</fpage><lpage>59</lpage><pub-id pub-id-type="doi">10.1080/09658211.2019.1679837</pub-id><pub-id pub-id-type="pmid">31612770</pub-id></element-citation></ref><ref id="bib108"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Seel</surname><given-names>NM</given-names></name></person-group><year iso-8601-date="2012">2012</year><chapter-title>Rescorla-Wagner model</chapter-title><person-group person-group-type="editor"><name><surname>Seel</surname><given-names>NM</given-names></name></person-group><source>Encyclopedia of the Sciences of Learning</source><publisher-name>Springer US</publisher-name><pub-id pub-id-type="doi">10.1007/978-1-4419-1428-6_2377</pub-id></element-citation></ref><ref id="bib109"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shrout</surname><given-names>PE</given-names></name><name><surname>Fleiss</surname><given-names>JL</given-names></name></person-group><year iso-8601-date="1979">1979</year><article-title>Intraclass correlations: uses in assessing rater reliability</article-title><source>Psychological Bulletin</source><volume>86</volume><fpage>420</fpage><lpage>428</lpage><pub-id pub-id-type="doi">10.1037//0033-2909.86.2.420</pub-id><pub-id pub-id-type="pmid">18839484</pub-id></element-citation></ref><ref id="bib110"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shumake</surname><given-names>J</given-names></name><name><surname>Furgeson-Moreira</surname><given-names>S</given-names></name><name><surname>Monfils</surname><given-names>MH</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>Predictability and heritability of individual differences in fear learning</article-title><source>Animal Cognition</source><volume>17</volume><fpage>1207</fpage><lpage>1221</lpage><pub-id pub-id-type="doi">10.1007/s10071-014-0752-1</pub-id><pub-id pub-id-type="pmid">24791664</pub-id></element-citation></ref><ref id="bib111"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sjouwerman</surname><given-names>R</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Latency of skin conductance responses across stimulus modalities</article-title><source>Psychophysiology</source><volume>56</volume><elocation-id>e13307</elocation-id><pub-id pub-id-type="doi">10.1111/psyp.13307</pub-id><pub-id pub-id-type="pmid">30461024</pub-id></element-citation></ref><ref id="bib112"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sjouwerman</surname><given-names>R</given-names></name><name><surname>Illius</surname><given-names>S</given-names></name><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>A data multiverse analysis investigating non-model based SCR quantification approaches</article-title><source>Psychophysiology</source><volume>1</volume><elocation-id>e14130</elocation-id><pub-id pub-id-type="doi">10.1111/psyp.14130</pub-id><pub-id pub-id-type="pmid">35780077</pub-id></element-citation></ref><ref id="bib113"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smits</surname><given-names>JAJ</given-names></name><name><surname>Rosenfield</surname><given-names>D</given-names></name><name><surname>Otto</surname><given-names>MW</given-names></name><name><surname>Marques</surname><given-names>L</given-names></name><name><surname>Davis</surname><given-names>ML</given-names></name><name><surname>Meuret</surname><given-names>AE</given-names></name><name><surname>Simon</surname><given-names>NM</given-names></name><name><surname>Pollack</surname><given-names>MH</given-names></name><name><surname>Hofmann</surname><given-names>SG</given-names></name></person-group><year iso-8601-date="2013">2013a</year><article-title>D-Cycloserine enhancement of exposure therapy for social anxiety disorder depends on the success of exposure sessions</article-title><source>Journal of Psychiatric Research</source><volume>47</volume><fpage>1455</fpage><lpage>1461</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychires.2013.06.020</pub-id><pub-id pub-id-type="pmid">23870811</pub-id></element-citation></ref><ref id="bib114"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smits</surname><given-names>JAJ</given-names></name><name><surname>Rosenfield</surname><given-names>D</given-names></name><name><surname>Otto</surname><given-names>MW</given-names></name><name><surname>Powers</surname><given-names>MB</given-names></name><name><surname>Hofmann</surname><given-names>SG</given-names></name><name><surname>Telch</surname><given-names>MJ</given-names></name><name><surname>Pollack</surname><given-names>MH</given-names></name><name><surname>Tart</surname><given-names>CD</given-names></name></person-group><year iso-8601-date="2013">2013b</year><article-title>D-Cycloserine enhancement of fear extinction is specific to successful exposure sessions: evidence from the treatment of height phobia</article-title><source>Biological Psychiatry</source><volume>73</volume><fpage>1054</fpage><lpage>1058</lpage><pub-id pub-id-type="doi">10.1016/j.biopsych.2012.12.009</pub-id><pub-id pub-id-type="pmid">23332511</pub-id></element-citation></ref><ref id="bib115"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Spearman</surname><given-names>C</given-names></name></person-group><year iso-8601-date="1910">1910</year><article-title>Correlation calculated from faulty data</article-title><source>British Journal of Psychology, 1904-1920</source><volume>3</volume><fpage>271</fpage><lpage>295</lpage><pub-id pub-id-type="doi">10.1111/j.2044-8295.1910.tb00206.x</pub-id></element-citation></ref><ref id="bib116"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Specht</surname><given-names>J</given-names></name><name><surname>Egloff</surname><given-names>B</given-names></name><name><surname>Schmukle</surname><given-names>SC</given-names></name></person-group><year iso-8601-date="2011">2011</year><article-title>Stability and change of personality across the life course: the impact of age and major life events on mean-level and rank-order stability of the big five</article-title><source>Journal of Personality and Social Psychology</source><volume>101</volume><fpage>862</fpage><lpage>882</lpage><pub-id pub-id-type="doi">10.1037/a0024950</pub-id><pub-id pub-id-type="pmid">21859226</pub-id></element-citation></ref><ref id="bib117"><element-citation publication-type="book"><person-group person-group-type="author"><name><surname>Spielberger</surname><given-names>CD</given-names></name></person-group><year iso-8601-date="1983">1983</year><source>Manual for the State-Trait Inventory STAI</source><publisher-loc>Palo Alto, CA</publisher-loc><publisher-name>Mind Garden</publisher-name></element-citation></ref><ref id="bib118"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steegen</surname><given-names>S</given-names></name><name><surname>Tuerlinckx</surname><given-names>F</given-names></name><name><surname>Gelman</surname><given-names>A</given-names></name><name><surname>Vanpaemel</surname><given-names>W</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Increasing transparency through a multiverse analysis</article-title><source>Perspectives on Psychological Science</source><volume>11</volume><fpage>702</fpage><lpage>712</lpage><pub-id pub-id-type="doi">10.1177/1745691616658637</pub-id><pub-id pub-id-type="pmid">27694465</pub-id></element-citation></ref><ref id="bib119"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Thomas</surname><given-names>DR</given-names></name><name><surname>Zumbo</surname><given-names>BD</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>Difference scores from the point of view of reliability and repeated-measures ANOVA: in defense of difference scores for data analysis</article-title><source>Educational and Psychological Measurement</source><volume>72</volume><fpage>37</fpage><lpage>43</lpage><pub-id pub-id-type="doi">10.1177/0013164411409929</pub-id></element-citation></ref><ref id="bib120"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Tiedemann</surname><given-names>F</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Gghalves: compose half-half plots using your favourite geoms</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=gghalves">https://CRAN.R-project.org/package=gghalves</ext-link></element-citation></ref><ref id="bib121"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Torchiano</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Effsize: efficient effect size computation</data-title><source>Zenodo</source><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.1480624">https://doi.org/10.5281/zenodo.1480624</ext-link></element-citation></ref><ref id="bib122"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torrents-Rodas</surname><given-names>D</given-names></name><name><surname>Fullana</surname><given-names>MA</given-names></name><name><surname>Bonillo</surname><given-names>A</given-names></name><name><surname>Andión</surname><given-names>O</given-names></name><name><surname>Molinuevo</surname><given-names>B</given-names></name><name><surname>Caseras</surname><given-names>X</given-names></name><name><surname>Torrubia</surname><given-names>R</given-names></name></person-group><year iso-8601-date="2014">2014</year><article-title>Testing the temporal stability of individual differences in the acquisition and generalization of fear</article-title><source>Psychophysiology</source><volume>51</volume><fpage>697</fpage><lpage>705</lpage><pub-id pub-id-type="doi">10.1111/psyp.12213</pub-id><pub-id pub-id-type="pmid">24673651</pub-id></element-citation></ref><ref id="bib123"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Ushey</surname><given-names>K</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Renv: project environments</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=renv">https://CRAN.R-project.org/package=renv</ext-link></element-citation></ref><ref id="bib124"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vaidya</surname><given-names>JG</given-names></name><name><surname>Gray</surname><given-names>EK</given-names></name><name><surname>Haig</surname><given-names>J</given-names></name><name><surname>Watson</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2002">2002</year><article-title>On the temporal stability of personality: evidence for differential stability and the role of life experiences</article-title><source>Journal of Personality and Social Psychology</source><volume>83</volume><fpage>1469</fpage><lpage>1484</lpage><pub-id pub-id-type="doi">10.1037/0022-3514.83.6.1469</pub-id><pub-id pub-id-type="pmid">12500825</pub-id></element-citation></ref><ref id="bib125"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vervliet</surname><given-names>B</given-names></name><name><surname>Baeyens</surname><given-names>F</given-names></name><name><surname>Van den Bergh</surname><given-names>O</given-names></name><name><surname>Hermans</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2013">2013a</year><article-title>Extinction, generalization, and return of fear: a critical review of renewal research in humans</article-title><source>Biological Psychology</source><volume>92</volume><fpage>51</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.1016/j.biopsycho.2012.01.006</pub-id><pub-id pub-id-type="pmid">22285129</pub-id></element-citation></ref><ref id="bib126"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vervliet</surname><given-names>B</given-names></name><name><surname>Craske</surname><given-names>MG</given-names></name><name><surname>Hermans</surname><given-names>D</given-names></name></person-group><year iso-8601-date="2013">2013b</year><article-title>Fear extinction and relapse: state of the art</article-title><source>Annual Review of Clinical Psychology</source><volume>9</volume><fpage>215</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1146/annurev-clinpsy-050212-185542</pub-id><pub-id pub-id-type="pmid">23537484</pub-id></element-citation></ref><ref id="bib127"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Visser</surname><given-names>RM</given-names></name><name><surname>de Haan</surname><given-names>MIC</given-names></name><name><surname>Beemsterboer</surname><given-names>T</given-names></name><name><surname>Haver</surname><given-names>P</given-names></name><name><surname>Kindt</surname><given-names>M</given-names></name><name><surname>Scholte</surname><given-names>HS</given-names></name></person-group><year iso-8601-date="2016">2016</year><article-title>Quantifying learning-dependent changes in the brain: single-trial multivoxel pattern analysis requires slow event-related fMRI</article-title><source>Psychophysiology</source><volume>53</volume><fpage>1117</fpage><lpage>1127</lpage><pub-id pub-id-type="doi">10.1111/psyp.12665</pub-id><pub-id pub-id-type="pmid">27153295</pub-id></element-citation></ref><ref id="bib128"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Visser</surname><given-names>RM</given-names></name><name><surname>Bathelt</surname><given-names>J</given-names></name><name><surname>Scholte</surname><given-names>HS</given-names></name><name><surname>Kindt</surname><given-names>M</given-names></name></person-group><year iso-8601-date="2021">2021</year><article-title>Robust BOLD responses to faces but not to conditioned threat: challenging the amygdala ’ S reputation in human fear and extinction learning</article-title><source>The Journal of Neuroscience</source><volume>41</volume><fpage>10278</fpage><lpage>10292</lpage><pub-id pub-id-type="doi">10.1523/JNEUROSCI.0857-21.2021</pub-id><pub-id pub-id-type="pmid">34750227</pub-id></element-citation></ref><ref id="bib129"><element-citation publication-type="preprint"><person-group person-group-type="author"><name><surname>Werner</surname><given-names>F</given-names></name><name><surname>Klingelhöfer-Jens</surname><given-names>M</given-names></name><name><surname>Schümann</surname><given-names>D</given-names></name><name><surname>Gamer</surname><given-names>M</given-names></name><name><surname>Kalisch</surname><given-names>R</given-names></name><name><surname>Sommer</surname><given-names>T</given-names></name><name><surname>Lonsdorf</surname><given-names>TB</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Limited Temporal Stability of the Spielberger State-Trait Inventory over 3.5 Years</article-title><source>PsyArXiv</source><pub-id pub-id-type="doi">10.31234/osf.io/mubgv</pub-id></element-citation></ref><ref id="bib130"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wickham</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2007">2007</year><article-title>Reshaping data with the reshape package</article-title><source>Journal of Statistical Software</source><volume>21</volume><fpage>1</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.18637/jss.v021.i12</pub-id></element-citation></ref><ref id="bib131"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Wickham</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2016">2016</year><data-title>Elegant graphics for data analysis</data-title><version designator="3.3.5">3.3.5</version><source>Ggplot2</source><ext-link ext-link-type="uri" xlink:href="https://ggplot2.tidyverse.org">https://ggplot2.tidyverse.org</ext-link></element-citation></ref><ref id="bib132"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Wickham</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2019">2019</year><data-title>Stringr: simple, consistent wrappers for common string operations</data-title><version designator="1.4.1">1.4.1</version><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=stringr">https://CRAN.R-project.org/package=stringr</ext-link></element-citation></ref><ref id="bib133"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Wickham</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Tidyr: tidy messy data</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=tidyr">https://CRAN.R-project.org/package=tidyr</ext-link></element-citation></ref><ref id="bib134"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Wickham</surname><given-names>H</given-names></name><name><surname>François</surname><given-names>R</given-names></name><name><surname>Henry</surname><given-names>L</given-names></name><name><surname>Müller</surname><given-names>K</given-names></name></person-group><year iso-8601-date="2021">2021</year><data-title>Dplyr: a grammar of data manipulation</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=dplyr">https://CRAN.R-project.org/package=dplyr</ext-link></element-citation></ref><ref id="bib135"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Wilke</surname><given-names>CO</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>Cowplot: streamlined plot theme and plot annotations for ’ ggplot2</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=cowplot">https://CRAN.R-project.org/package=cowplot</ext-link></element-citation></ref><ref id="bib136"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Xie</surname><given-names>Y</given-names></name></person-group><year iso-8601-date="2015">2015</year><data-title>Dynamic documents with R and knitr</data-title><source>Yihui</source><ext-link ext-link-type="uri" xlink:href="https://yihui.org/knitr/">https://yihui.org/knitr/</ext-link></element-citation></ref><ref id="bib137"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiong</surname><given-names>P</given-names></name><name><surname>Liu</surname><given-names>M</given-names></name><name><surname>Liu</surname><given-names>B</given-names></name><name><surname>Hall</surname><given-names>BJ</given-names></name></person-group><year iso-8601-date="2022">2022</year><article-title>Trends in the incidence and dalys of anxiety disorders at the global, regional, and national levels: estimates from the global burden of disease study 2019</article-title><source>Journal of Affective Disorders</source><volume>297</volume><fpage>83</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2021.10.022</pub-id><pub-id pub-id-type="pmid">34678404</pub-id></element-citation></ref><ref id="bib138"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yonkers</surname><given-names>KA</given-names></name><name><surname>Bruce</surname><given-names>SE</given-names></name><name><surname>Dyck</surname><given-names>IR</given-names></name><name><surname>Keller</surname><given-names>MB</given-names></name></person-group><year iso-8601-date="2003">2003</year><article-title>Chronicity, relapse, and illness? course of panic disorder, social phobia, and generalized anxiety disorder: findings in men and women from 8 years of follow-up</article-title><source>Depression and Anxiety</source><volume>17</volume><fpage>173</fpage><lpage>179</lpage><pub-id pub-id-type="doi">10.1002/da.10106</pub-id><pub-id pub-id-type="pmid">12768651</pub-id></element-citation></ref><ref id="bib139"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeidan</surname><given-names>MA</given-names></name><name><surname>Lebron‐Milad</surname><given-names>K</given-names></name><name><surname>Thompson‐Hollands</surname><given-names>J</given-names></name><name><surname>Im</surname><given-names>JJY</given-names></name><name><surname>Dougherty</surname><given-names>DD</given-names></name><name><surname>Holt</surname><given-names>DJ</given-names></name><name><surname>Orr</surname><given-names>SP</given-names></name><name><surname>Milad</surname><given-names>MR</given-names></name></person-group><year iso-8601-date="2012">2012</year><article-title>Test–retest reliability during fear acquisition and fear extinction in humans</article-title><source>CNS Neuroscience & Therapeutics</source><volume>18</volume><fpage>313</fpage><lpage>317</lpage><pub-id pub-id-type="doi">10.1111/j.1755-5949.2011.00238.x</pub-id><pub-id pub-id-type="pmid">21592319</pub-id></element-citation></ref><ref id="bib140"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeileis</surname><given-names>A</given-names></name><name><surname>Hothorn</surname><given-names>T</given-names></name></person-group><year iso-8601-date="2002">2002</year><article-title>Diagnostic checking in regression relationships</article-title><source>R News</source><volume>2</volume><fpage>7</fpage><lpage>10</lpage></element-citation></ref><ref id="bib141"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeileis</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2004">2004</year><article-title>Econometric computing with HC and HAC covariance matrix estimators</article-title><source>Journal of Statistical Software</source><volume>11</volume><fpage>1</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.18637/jss.v011.i10</pub-id></element-citation></ref><ref id="bib142"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeileis</surname><given-names>A</given-names></name><name><surname>Grothendieck</surname><given-names>G</given-names></name></person-group><year iso-8601-date="2005">2005</year><article-title>Zoo: S3 infrastructure for regular and irregular time series</article-title><source>Journal of Statistical Software</source><volume>14</volume><fpage>1</fpage><lpage>27</lpage><pub-id pub-id-type="doi">10.18637/jss.v014.i06</pub-id></element-citation></ref><ref id="bib143"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeileis</surname><given-names>A</given-names></name></person-group><year iso-8601-date="2006">2006</year><article-title>Object-oriented computation of sandwich estimators</article-title><source>Journal of Statistical Software</source><volume>16</volume><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.18637/jss.v016.i09</pub-id></element-citation></ref><ref id="bib144"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeileis</surname><given-names>A</given-names></name><name><surname>Köll</surname><given-names>S</given-names></name><name><surname>Graham</surname><given-names>N</given-names></name></person-group><year iso-8601-date="2020">2020</year><article-title>Various versatile variances: an object-oriented implementation of clustered covariances in R</article-title><source>Journal of Statistical Software</source><volume>95</volume><fpage>1</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.18637/jss.v095.i01</pub-id></element-citation></ref><ref id="bib145"><element-citation publication-type="software"><person-group person-group-type="author"><name><surname>Zhu</surname><given-names>H</given-names></name></person-group><year iso-8601-date="2020">2020</year><data-title>KableExtra: construct complex table with ’ kable ’ and pipe SYNTAX</data-title><source>R-Project</source><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=kableExtra">https://CRAN.R-project.org/package=kableExtra</ext-link></element-citation></ref><ref id="bib146"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zuo</surname><given-names>XN</given-names></name><name><surname>Xu</surname><given-names>T</given-names></name><name><surname>Milham</surname><given-names>MP</given-names></name></person-group><year iso-8601-date="2019">2019</year><article-title>Harnessing reliability for neuroscience research</article-title><source>Nature Human Behaviour</source><volume>3</volume><fpage>768</fpage><lpage>771</lpage><pub-id pub-id-type="doi">10.1038/s41562-019-0655-x</pub-id><pub-id pub-id-type="pmid">31253883</pub-id></element-citation></ref></ref-list></back><sub-article article-type="editor-report" id="sa0"><front-stub><article-id pub-id-type="doi">10.7554/eLife.78717.sa0</article-id><title-group><article-title>Editor's evaluation</article-title></title-group><contrib-group><contrib contrib-type="author"><name><surname>Shackman</surname><given-names>Alexander</given-names></name><role specific-use="editor">Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/047s2c258</institution-id><institution>University of Maryland</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group><related-object id="sa0ro1" object-id-type="id" object-id="10.1101/2022.03.15.484434" link-type="continued-by" xlink:href="https://sciety.org/articles/activity/10.1101/2022.03.15.484434"/></front-stub><body><p>The authors assess the psychometric properties of behavioral, psychophysiological, and brain imaging measures of fear conditioning. Six-month retest reliability was generally low, whereas internal-consistency reliability was generally high. At the group level, reliability and criterion validity were generally good. Most measurements proved sensitive to data analytical choices. Results are framed within a larger discussion of the role of measurement properties in individual difference research and clinical translation and have the potential to serve as an important building block towards improvement in both these areas.</p></body></sub-article><sub-article article-type="decision-letter" id="sa1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.78717.sa1</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Shackman</surname><given-names>Alexander</given-names></name><role>Reviewing Editor</role><aff><institution-wrap><institution-id institution-id-type="ror">https://ror.org/047s2c258</institution-id><institution>University of Maryland</institution></institution-wrap><country>United States</country></aff></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name><surname>Leknes</surname><given-names>Siri</given-names></name><role>Reviewer</role></contrib></contrib-group></front-stub><body><boxed-text id="sa2-box1"><p>Our editorial process produces two outputs: (i) <ext-link ext-link-type="uri" xlink:href="https://sciety.org/articles/activity/10.1101/2022.03.15.484434">public reviews</ext-link> designed to be posted alongside <ext-link ext-link-type="uri" xlink:href="https://www.biorxiv.org/content/10.1101/2022.03.15.484434v1">the preprint</ext-link> for the benefit of readers; (ii) feedback on the manuscript for the authors, including requests for revisions, shown below. We also include an acceptance summary that explains what the editors found interesting or important about the work.</p></boxed-text><p><bold>Decision letter after peer review:</bold></p><p>Thank you for submitting your article "Robust group- but limited individual-level (longitudinal) reliability and insights into cross-phases response prediction of conditioned fear" for consideration by <italic>eLife</italic>. Your article has been reviewed by 2 peer reviewers, and the evaluation has been overseen by Drs. Shackman (Reviewing Editor) and Baker (Senior Editor).</p><p>The reviewers highlighted several strengths of the manuscript.</p><p>– We very much liked the paper and appreciate the potentially important implications for the field.</p><p>– This is a very comprehensive and thoughtful effort</p><p>– The approach is thorough, with a range of analysis approaches, including within- and between-subjects similarity, the individual-level overlap of fMRI results, ICCs, and cross-sectional reliability. It is important to determine these values so that researchers can discard incorrect assumptions, such as the belief that threat responses at baseline can be predictive of treatment responses in patient populations.</p><p>– The conclusions of this work are largely supported by the data and methodological approach, and this is a good benchmark for the field.</p><p>– The overall approach is excellent and represents the vanguard of open science practices (preregistration, all materials freely available, documentation of analysis deviations, multiverse analyses, etc.).</p><p>– This comprehensive approach drives home the conclusion that specific analytic choices and researcher "degrees of freedom" can have sometimes drastic effects on fundamental measurement properties. I think this underlines what I view as the key contribution of this manuscript: empirically highlighting the need for the fear conditioning field to pay more attention to measurement properties.</p><p>– Going beyond standard associative measures of reliability (ICCs) is an important contribution of this work, as they allow the authors to comment on nuances of individual-difference reliability that are not possible with the coarser ICCs. In turn, this facilitates researchers in making more informed decisions regarding the design of fear conditioning tasks to assess individual differences.</p><p>– The fMRI results are a particular strength, as fMRI continues to be a common fear conditioning index, yet its measurement properties in this context remain critically understudied. The choice to use standard ICCs in conjunction with similarity approaches is particularly fruitful here, as in conjunction with overlap metrics we now have a much better appraisal of the different components of reliability in fMRI data – and potential explanations for differences between behavioral and fMRI reliabilities.</p><p>– The poor reliability identified by several of these approaches is likely to be of great importance to this large, translational field.</p><p>Nevertheless, several aspects of the manuscript somewhat diminished enthusiasm, as outlined below.</p><p>The reviewers have discussed their critiques with one another and the Reviewing Editor, who has drafted this to help you prepare a revised submission.</p><p>Major Revisions:</p><p>1. Terminology. In terms of aligning with the psychometric and measurement literature, and to assist in clarity, I suggest the authors consistently use established measurement terms whenever possible. For example, "cross-sectional validity" is more often referred to as internal consistency, and the cross-phase predictions are an example of criterion/predictive validity. As the fear conditioning field is relatively new to formal measurement theory, it would be helpful to have everyone "on the same page" as decades of prior measurement work.</p><p>2. Increase Accessibility for a Broad Scientific Audience. We strongly recommend a thorough rewrite of the introduction and discussion. Essentially, the paper needs to be substantially 'dumbed down' to become accessible to the broad readership it deserves. The introduction should start by explaining the rationale for choosing to use fear conditioning methods, why fear conditioning research is so important, and mention some key insights generated by this type of research. (Some of this is included and/or alluded to, but could be clarified further for readers not working with fear conditioning in humans).</p><p>3. Clarify the Study Design. In the introduction, clearly explain the study design and the kinds of the data included in the analyses, both in the text and using a diagram with a visual overview of a typical trajectory (conditioning and extinction phases) either at the individual or group level or both. Ideally include example results of all the kinds included here, SCR, ratings and fMRI data and at both individual and group level, to really give the reader a sense of what's gone into in your analyses. Relatedly: The authors should take care to clearly articulate why they chose to use using 1st point, endpoint and average to investigate cross-phase predictability in the manuscript. Also, why it makes sense to look at raw, log-transformed, ranked, non-ranked data etc.</p><p>4. Clarify the Significance of Individual Differences and Reliability</p><p>a. A bit more explanation for why individual-level measurement is important for addressing clinical individual differences would be appreciated. At present, it is described as important but the "why" is not really fleshed out. For example, content about heterogeneity in clinical presentations requiring individual-level metrics (as opposed to categorical diagnoses) could be useful here.</p><p>b. When explaining the importance of reliability and prediction, please try to give concrete examples. Imagine your reader is an eager but inexperienced grad student who might wish to embark upon fear conditioning research, but has no experience and needs guidance at every level. How many trials to include, what type of measures, how many participants, how large can effects be expected to be – the discussion should clarify how your analyses inform these decisions.</p><p>5. Clarify the Significance of "Group-Level" Reliability. The authors analyze group-level reliability and situate this as being improved in relation to individual-level reliability. I think the authors' explanation for the importance of group-level reliability is not fully fleshed out and at present, it is not clear what new information the field can take from the reported group-level reliabilities. At present there seems to be some circular logic here: group-level reliability is studied because group-level inferences are important, and they are important because they are studied. I am concerned that non-specialist readers might get the wrong impression from this and conclude that fear conditioning is only useful for group-level inference, as well as continue to perpetuate the paradox described so well in Hedge et al., 2018, and others (e.g. work from Russ Poldrack's group).</p><p>6. Internal Consistency Formula. The internal consistency (cross-sectional reliability) calculation used is not well-justified, and potentially needs additional parameters. It is not clear why the authors deviate from the internal consistency calculation described in Parson, Kruijt, and Fox et al., 2019, especially given that these procedures are used for other metrics elsewhere in the manuscript. I request that the authors either use the bias-corrected formula from Parson et al., 2019 or justify the use of the current calculation.</p><p>7. ROIs. For the fMRI analyses, the authors use an ROI approach based on prior studies of fear acquisition and extinction. The majority of the most consistently identified regions (as seen in meta-analyses, Fullana et al., 2016, 2018) are analyzed. However, it is not clear why other regions are omitted, particularly given meta-analytic evidence. Striatal regions and the thalamus are the most notable omissions. Further, a weakness is that functional ROIs in this study were based on peak coordinates from a handful of prior studies, instead of meta-analytically identified coordinates. As such, I do not think the authors present the strongest foundation for making conclusions about the reliability of fear conditioning fMRI Data. I request that the authors include additional ROIs for the canonical fear network, or justify why the particular ROIs that are currently reported were the only ones used. I also strongly suggest using meta-analytic coordinates to determine ROIs for these analyses.</p><p>8. Reliability and the Need for Nuance</p><p>a. The authors structure the manuscript around the premise that reliability is essential in conducting solid individual-differences science, which I agree with wholeheartedly. However, I think the authors rely on relatively arbitrary cut-offs for classifying reliability as good/poor/etc to an extent that is not warranted, particularly in the context of the Discussion, and it takes away from the impact of this effort. As the authors point out, these categorical cut-offs are more guidelines than strict rules, yet the manuscript is structured around the premise that individual-level reliability is problematically poor. Many cut-off recommendations are based on psychometric work on trait self-report measures that usually assume fewer determinants/sources of error than would be seen in neuroscience experiments, which in turn allows for larger ceilings for effect sizes and reliability. The current manuscript does not address this issue and what meaningful (as opposed to good) fear conditioning reliability is when moving away from the categorical cut-offs. In other words, is it possible that the authors actually observed "good" reliability in the context of fear conditioning work, and that this reliability is lower than other types of paradigms is just inherent to the construct being studied?</p><p>b. The framing of the manuscript could be adjusted, such that less emphasis is placed on arbitrary cut-off metrics and more about what these reliabilities mean in the context of fear conditioning paradigms. To be clear, I think the authors already address this to a degree in their Discussion, but perhaps need to go a step further and expand on the challenges of establishing reliability in this field and explicitly address why common cut-offs are perhaps not appropriate.</p><p>c. It may be more appropriate to use numbers rather than labels/benchmarks in the reporting of results in the Results section, i.e. reporting the r-value instead of "poor to questionable" etc.</p><p>9. Clarify Implications and Recommendations. The concrete implications of the research, and recommendations arising from it, should be clearly spelled out in the Abstract and Discussion, for the greatest utility.</p></body></sub-article><sub-article article-type="reply" id="sa2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.78717.sa2</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><disp-quote content-type="editor-comment"><p>Major Revisions:</p><p>1. Terminology. In terms of aligning with the psychometric and measurement literature, and to assist in clarity, I suggest the authors consistently use established measurement terms whenever possible. For example, "cross-sectional validity" is more often referred to as internal consistency, and the cross-phase predictions are an example of criterion/predictive validity. As the fear conditioning field is relatively new to formal measurement theory, it would be helpful to have everyone "on the same page" as decades of prior measurement work.</p></disp-quote><p>We agree with the importance of homogeneous terminology as outlined by the reviewer/editor. We use the term longitudinal reliability to provide an umbrella term for test-retest reliability with a long test-retest interval, which we compute through different approaches including ICCs, but also with relatively new reliability measures such as similarity and overlap. With this umbrella term we want to increase the understanding of the different measures and help the reader to keep track of them.</p><p>The term cross-sectional reliability was used to clearly discriminate this measure from longitudinal reliability. We agree, however, that it may be confusing to introduce a new term for internal consistency. In our revised manuscript, we follow the reviewers’/editors’ suggestion and use internal consistency throughout. We have also moved Table 1 to an earlier position in the manuscript as it assists in clarity by providing an overview of the different reliability types and detailed definitions as well as formulas (see also reviewer comment 11).</p><p>However, we do not fully agree with exchanging “cross-phase predictions” with “criterion or predictive validity”, because we do not want to validate one experimental phase against the other. Predictive validity in psychometrics is defined as “the extent to which a score on a scale (or test) predicts scores on some criterion measure” (cf. Cronbach and Meehl, 1955). For instance a cognitive test for job performance would have <italic>predictive validity</italic> if the observed correlation between the test score and the performance rating by the company were statistically significant.</p><p>Rather, we investigate whether responses in earlier experimental phases can predict responses in later experimental phases – both of which cannot be expected to “measure the same thing”. This is of relevance, as in the literature it is often assumed that this is true without strong empirical support due to a lack of available studies on this topic and heterogeneous findings in the literature (see e.g. Lonsdorf et al., 2017, Lonsdorf et al., 2020). We hope the reviewer and editor agree with this reasoning.</p><p>Cronbach, L.J., and Meehl, P.E. (1955). Construct validity for psychological tests. Psychological Bulletin, 52, 281-302.[1]</p><p>Lonsdorf, T. B., Menz, M. M., Andreatta, M., Fullana, M. A., Golkar, A., Haaker, J., … Merz, C. J. (2017). Don’t fear ’fear conditioning’: Methodological considerations for the design and analysis of studies on human fear acquisition, extinction, and return of fear. Neuroscience and Biobehavioral Reviews, 77, 247–285. https://doi.org/10.1016/j.neubiorev.2017.02.026</p><p>Lonsdorf, T. B., Merz, C. J., and Fullana, M. A. (2019). Fear Extinction Retention: Is It What We Think It Is? Biological Psychiatry, 85(12), 1074–1082.</p><disp-quote content-type="editor-comment"><p>2. Increase Accessibility for a Broad Scientific Audience. We strongly recommend a thorough rewrite of the introduction and discussion. Essentially, the paper needs to be substantially 'dumbed down' to become accessible to the broad readership it deserves. The introduction should start by explaining the rationale for choosing to use fear conditioning methods, why fear conditioning research is so important, and mention some key insights generated by this type of research. (Some of this is included and/or alluded to, but could be clarified further for readers not working with fear conditioning in humans).</p></disp-quote><p>We thank the reviewer/editor for raising these points and have restructured our introduction as suggested as well as edited the discussion accordingly. We hope that with these edits, we have made our manuscript more accessible to a wider audience. As the changes are excessive we refrain from quotes here.</p><disp-quote content-type="editor-comment"><p>3. Clarify the Study Design. In the introduction, clearly explain the study design and the kinds of the data included in the analyses, both in the text and using a diagram with a visual overview of a typical trajectory (conditioning and extinction phases) either at the individual or group level or both. Ideally include example results of all the kinds included here, SCR, ratings and fMRI data and at both individual and group level, to really give the reader a sense of what's gone into in your analyses.</p></disp-quote><p>We thank the reviewer/editor for pointing out that the study design and how the data went into our analyses were not sufficiently clear. We added a figure to the methods section which illustrates both the design and the calculations of our different measures. We opted for the calculations for the SCRs as an example because this is the outcome for which all calculations were performed.</p><disp-quote content-type="editor-comment"><p>Relatedly: The authors should take care to clearly articulate why they chose to use using 1st point, endpoint and average to investigate cross-phase predictability in the manuscript. Also, why it makes sense to look at raw, log-transformed, ranked, non-ranked data etc.</p></disp-quote><p>We thank the reviewer/editor for the suggestion to justify the specifications of our data even more clearly. We have added some explanations and justifications to the footnotes of Table 1 which we have moved to the end of the introduction.</p><disp-quote content-type="editor-comment"><p>4. Clarify the Significance of Individual Differences and Reliability</p><p>a. A bit more explanation for why individual-level measurement is important for addressing clinical individual differences would be appreciated. At present, it is described as important but the "why" is not really fleshed out. For example, content about heterogeneity in clinical presentations requiring individual-level metrics (as opposed to categorical diagnoses) could be useful here.</p></disp-quote><p>We thank the reviewer/editor for highlighting that this was not sufficiently clear. We added the following paragraph to the introduction:</p><p>“Hence, tackling clinical questions regarding individual prediction of symptom development or treatment outcome requires a shift towards and a validation of research methods tailored to individual differences – such as a focus on measurement reliability (Zuo, Xu, and Milham, 2019). This is a necessary precondition when striving for the long-term goal of developing individualized intervention and prevention programs. This relates to the pronounced symptomatic heterogeneity in symptom manifestations between individuals diagnosed with the same disorders (e.g. PTSD, Galatzer-Levy and Bryant, 2013) which is not captured in binary clinical diagnoses as two patients with the diagnosis PTSD may not share a single symptom (Galatzer-Levy and Bryant, 2013).”</p><disp-quote content-type="editor-comment"><p>b. When explaining the importance of reliability and prediction, please try to give concrete examples. Imagine your reader is an eager but inexperienced grad student who might wish to embark upon fear conditioning research, but has no experience and needs guidance at every level. How many trials to include, what type of measures, how many participants, how large can effects be expected to be – the discussion should clarify how your analyses inform these decisions.</p></disp-quote><p>We agree with the reviewer/editor that we have not fully exhausted our possibilities to derive recommendations from our findings. We have added a paragraph to the discussion to make explicit what recommendations can be derived from our work and where to go from here. For many of the questions mentioned in the comment, it is very difficult to provide strong empirically-based recommendations from this single study – even though we wholeheartedly agree and share the desire for clear guidance. Our work should be seen as a starting point to develop and refine such guidelines in the future. For instance, we cannot provide any guidance on how large effect sizes can be expected as this is highly dependent on the specific research question and study sample at stake. We, however, provide recommendations wherever possible and a clear outline for future work. We also refer to our previous methods-focused work for guidance (see e.g., Lonsdorf et al., 2017).</p><p>Lonsdorf, T. B., Menz, M. M., Andreatta, M., Fullana, M. A., Golkar, A., Haaker, J., Heitland, I., Hermann, A., Kuhn, M., Kruse, O., Meir Drexler, S., Meulders, A., Nees, F., Pittig, A., Richter, J., Römer, S., Shiban, Y., Schmitz, A., Straube, B., … Merz, C. J. (2017). Don’t fear „fear conditioning“: Methodological considerations for the design and analysis of studies on human fear acquisition, extinction, and return of fear. Neuroscience and Biobehavioral Reviews, 77, 247–285. https://doi.org/10.1016/j.neubiorev.2017.02.026</p><p>Example from discussion:</p><p>“While general recommendations and helpful discussions on the link between reliability and number of trials (Baker et al., 2021), statistical power (Parsons, 2020), maximally observable correlations (Parsons, 2020), sample and effect size (Hedge et al., 2018; Parsons, 2020) considerations exist, our results highlight the need for field and sub-discipline specific considerations. Our work allows for some initial recommendations and insights. First, we highlight the value of using multiple, more nuanced measures of reliability beyond traditional ICCs (i.e,. similarity, overlap, Fröhner et al. (2019)) and second, the relation between number of trials and reliability in an experiment with a learning component (i.e., no increase in reliability with an increasing number of trials). Importantly, our work can also be understood as an empirically-based call for action, since more work is needed to allow for clear-cut recommendations, and as a starting point to develop and refine comprehensive guidelines in the future.”</p><disp-quote content-type="editor-comment"><p>5. Clarify the Significance of "Group-Level" Reliability. The authors analyze group-level reliability and situate this as being improved in relation to individual-level reliability. I think the authors' explanation for the importance of group-level reliability is not fully fleshed out and at present, it is not clear what new information the field can take from the reported group-level reliabilities. At present there seems to be some circular logic here: group-level reliability is studied because group-level inferences are important, and they are important because they are studied. I am concerned that non-specialist readers might get the wrong impression from this and conclude that fear conditioning is only useful for group-level inference, as well as continue to perpetuate the paradox described so well in Hedge et al., 2018, and others (e.g. work from Russ Poldrack's group).</p></disp-quote><p>We thank the reviewer/editor for pointing out to us that our justification for researching reliability at the group level fell short. We have made further points as to why we think this is important.</p><p>Introduction:</p><p>“To date, both clinical and experimental research using the fear conditioning paradigm have primarily focused on group-level, basic, general mechanisms such as the effect of experimental manipulations – which is important to investigate (Lonsdorf and Merz, 2017).”</p><p>“More precisely, longitudinal reliability at the group level indicates the extent to which responses averaged across the group as a whole are stable over time, which is important to establish when investigating basic, generic principles such as the impact of experimental manipulations. Even though it has to be acknowledged that the group average is not necessarily representative of any individual in the group and the same group average may arise from different and even opposite individual responses at both time points in the same group, group-level reliability is important to establish in addition to individual-level reliability. Group-level reliability is relevant not only to work focusing on the understanding of general, generic processes but also for questions about differences between two groups of individuals such as patients vs. controls (e.g., see meta-analyses of Cooper et al., 2022; Duits et al., 2015). Of note, many fear conditioning paradigms were initially developed to study general group-level processes and to elicit robust group effects (Lonsdorf and Merz, 2017). Hence it is important to investigate both group- and individual-level reliability given the challenges of attempts to employ cognitive tasks that were originally designed to produce robust group effects in individual difference research (Elliott et al., 2020; Hedge et al., 2018; Parsons, 2020; Parsons, Kruijt, and Fox, 2019).”</p><p>Discussion:</p><p>“First, the limited longitudinal individual-level reliability might indicate that the fear conditioning paradigm employed here – which is a rather strong paradigm with 100% reinforcement rate – may be better suited for investigations of group effects and to a lesser extent for individual difference questions – potentially due to limited variance between individuals (Hedge et al., 2018; Parsons, 2020; Parsons et al., 2019). However, high correlations seem to be possible in principle, as we can conclude from the robust internal consistency of SCRs that we observed. This speaks against a limited between-subject variance and a general impracticability of the paradigm for individual difference research. Hence we call for caution and warn against concluding from our report that fear conditioning and our outcome measures (SCRs, BOLD fMRI) are unreliable at the individual level.”</p><disp-quote content-type="editor-comment"><p>6. Internal Consistency Formula. The internal consistency (cross-sectional reliability) calculation used is not well-justified, and potentially needs additional parameters. It is not clear why the authors deviate from the internal consistency calculation described in Parson, Kruijt, and Fox et al., 2019, especially given that these procedures are used for other metrics elsewhere in the manuscript. I request that the authors either use the bias-corrected formula from Parson et al., 2019 or justify the use of the current calculation.</p></disp-quote><p>We very much appreciate the approach of Parsons et al. (2019) of robust estimation through permutation, in which data are multiple times (Parsons et al. (2019) recommend 5000 times as a minimum) randomly split into two halves, the reliability is estimated for each split and these estimates are averaged. In our case as the fear conditioning paradigm is a learning paradigm, however, we believe this approach cannot be applied without further consideration, since not every random division of the data makes sense. If, for example, the data from the acquisition phase were divided into halves, responses from the beginning and end of this phase would be used to determine reliability. However, because it is a learning experiment, these two halves probably do not measure exactly the same construct. Therefore, we decided to use the odd-even method because we believe that adjacent trials “measure something more similar” than random splits in this specific case (i.e., learning paradigm). We have included a brief justification in the manuscript:</p><p>“We considered the odd-even approach as the most appropriate since our paradigm constitutes a learning experiment and we suggest that adjacent trials measure a more similar construct compared to other possible splits of trials such as a split into halves or a large number of random splits as implemented in the permutation-based approach recommended by Parsons et al. (2019).”</p><disp-quote content-type="editor-comment"><p>7. ROIs. For the fMRI analyses, the authors use an ROI approach based on prior studies of fear acquisition and extinction. The majority of the most consistently identified regions (as seen in meta-analyses, Fullana et al., 2016, 2018) are analyzed. However, it is not clear why other regions are omitted, particularly given meta-analytic evidence. Striatal regions and the thalamus are the most notable omissions. Further, a weakness is that functional ROIs in this study were based on peak coordinates from a handful of prior studies, instead of meta-analytically identified coordinates. As such, I do not think the authors present the strongest foundation for making conclusions about the reliability of fear conditioning fMRI Data. I request that the authors include additional ROIs for the canonical fear network, or justify why the particular ROIs that are currently reported were the only ones used. I also strongly suggest using meta-analytic coordinates to determine ROIs for these analyses.</p></disp-quote><p>We thank the reviewer/editor for pointing out that our results and arguments could be strengthened by expanding the number of included ROIs to those identified in previous meta-analyses (Fullana et al., 2016, 2018). To accommodate this suggestion, we have added the caudate nucleus, the putamen, the pallidum, the nucleus accumbens as a proxy for the ventral striatum as well as the thalamus to the list of anatomically defined ROIs. Furthermore, we have created masks for the dACC and the dlPFC based on meta-analytically identified peak coordinates. For the vmPFC, the meta-analysis only identified a non-significant cluster with negative activation for CS+ > CS-, with its peak coordinates more anterior than the activation found in previous publications (see red box in <xref ref-type="fig" rid="sa2fig1">Author response image 1</xref> for our mask and crosshair for meta-analytic peak coordinates) (Kalisch et al., 2006, Milad et al., 2007). Since the involvement of the vmPFC in fear acquisition and especially extinction has still been shown in many publications, we wanted to include the ROI but based its location on previous studies.</p><fig id="sa2fig1" position="float"><label>Author response image 1.</label><graphic mimetype="image" mime-subtype="jpeg" xlink:href="elife-78717.xml.media/sa2-fig1.jpg"/></fig><p>Please note, that the general pattern of results for all our measures of reliability has not changed with the addition of regions of interest and the masks based on peak coordinates identified in meta-analyses.An updated detailed methodological description can be found in the methods section (see pages 64 – 65) under ‘Regions of Interest’. The excerpt is also pasted below for your convenience.</p><p>“A total of 11 regions of interest (ROIs; i.e., bilateral anterior insula, amygdala, hippocampus, caudate nucleus, putamen, pallidum, nucleus accumbens [NAcc], thalamus, dorsal anterior cingulate cortex [dACC], dorsolateral prefrontal cortex [dlPFC] and ventromedial prefrontal cortex [vmPFC]) were included in the current study. Amygdala, hippocampus, caudate nucleus, putamen, pallidum, ventral striatum (i.e., nucleus accumbens) and thalamus anatomical masks were extracted from the Harvard-Oxford atlas (Desikan et al., 2006) at a maximum probability threshold of 0.5. The anterior insula was defined as the overlap between the thresholded anatomical mask from the Harvard Oxford atlas (threshold: 0.5) and a box of size 60 x 30 x 60 mm centered around MNIxyz = 0, 30, 0 based on anatomical subdivisions (Nieuwenhuys, 2012). The cortical ROI dlPFC and dACC were created by building a box of size 20 x 16 x 16 mm around peak voxels obtained in a meta-analysis (with the x coordinate set to 0 for the dACC) (left dlPFC: MNIxyz = -36, 44, 22, right dlPFC: MNIxyz = 34, 44, 32, dACC: MNIxyz = 0, 18, 42, Fullana et al., 2016). As previously reported (Lonsdorf, Haaker, and Kalisch, 2014), the cortical vmPFC was created by using a box of size 20 x 16 x 16 mm centered on peak coordinates identified in prior studies of fear learning (vmPFC: MNIxyz = 0, 40, -12, e.g., Kalisch et al. (2006), Milad et al. (2007)) with the x coordinate set to 0 to obtain masks symmetric around the midline.”</p><p>We have further adjusted Appendix 3-table 3, Figure 3, Table 2 and Figure 6 and have slightly altered the results description to reflect the updated results.</p><p>“For BOLD fMRI, both ICC-types suggest rather limited reliability for CS discrimination during acquisition (both ICC<sub>abs</sub> and ICC<sub>con</sub> = 0.17) and extinction training (both ICC<sub>abs</sub> and ICC<sub>con</sub> = 0.01). For individual ROIs (anterior insula, amygdala, hippocampus, caudate nucleus, putamen, pallidum, nucleus accumbens, thalamus, dACC, dlPFC and vmPFC), ICCs were even lower (all ICCs ≤ 0.001; for full results see Appendix 3-table 3).”</p><p>“In contrast to what was observed for SCRs, within-subject similarity was significantly higher than between-subject similarity in the whole brain (<italic>p</italic> <.001) and most of the ROIs for fear acquisition training (see Figure 3A and Appendix 4-table 1). This suggests that while absolute values for similarity might be low, individual brain activation patterns during fear acquisition training at T0 were – at large – still more similar to the same subject’s activation pattern at T1 than to any others at T1. For extinction training, however, no significant differences between within- and between-subject similarity were found for any ROI or the whole brain (all <italic>p</italic>’s >.306; see Figure 3B and Appendix 4-table 1).”</p><p>“In stark contrast to the low overlap of individual-level activation (see Table 2A), the overlap at the group level was rather high with 62.00 % for the whole brain and up to 89.80 % for ROIs (i.e., dACC and dlPFC; Jaccard) for CS discrimination during acquisition training (see Table 2B). Similar to what was observed for overlap at the individual level, a much lower overlap for extinction training as compared to acquisition training was observed for the whole brain (5.70 % overlap) and all ROIs (all close to zero).”</p><p>“In short, all but one association (CS discrimnation in the NAcc) was positive, showing that higher BOLD response during acquisition was associated with higher BOLD responding during extinction training (see Figure 6). However, the standardized β coefficients are mostly below or around 0.3 except for CS+ associations in the dACC, indicating non-substantial associations for all ROIs and CS specifications that were near absent for CS discrimination. Analysis of CS+ and CS- data was included here as the analysis is based on β maps and not T-maps (as in previous analyses) where a contrast against baseline is not optimal.”</p><disp-quote content-type="editor-comment"><p>8. Reliability and the Need for Nuance</p><p>a. The authors structure the manuscript around the premise that reliability is essential in conducting solid individual-differences science, which I agree with wholeheartedly. However, I think the authors rely on relatively arbitrary cut-offs for classifying reliability as good/poor/etc to an extent that is not warranted, particularly in the context of the Discussion, and it takes away from the impact of this effort. As the authors point out, these categorical cut-offs are more guidelines than strict rules, yet the manuscript is structured around the premise that individual-level reliability is problematically poor. Many cut-off recommendations are based on psychometric work on trait self-report measures that usually assume fewer determinants/sources of error than would be seen in neuroscience experiments, which in turn allows for larger ceilings for effect sizes and reliability. The current manuscript does not address this issue and what meaningful (as opposed to good) fear conditioning reliability is when moving away from the categorical cut-offs. In other words, is it possible that the authors actually observed "good" reliability in the context of fear conditioning work, and that this reliability is lower than other types of paradigms is just inherent to the construct being studied?</p><p>b. The framing of the manuscript could be adjusted, such that less emphasis is placed on arbitrary cut-off metrics and more about what these reliabilities mean in the context of fear conditioning paradigms. To be clear, I think the authors already address this to a degree in their Discussion, but perhaps need to go a step further and expand on the challenges of establishing reliability in this field and explicitly address why common cut-offs are perhaps not appropriate.</p><p>c. It may be more appropriate to use numbers rather than labels/benchmarks in the reporting of results in the Results section, i.e. reporting the r-value instead of "poor to questionable" etc.</p></disp-quote><p>We agree with the reviewer/editor that a focus on the categorical and admittedly arbitrary cut offs may be misleading. We have replaced these (i.e., poor, moderate, high…) with numerical values for reliability in the Results section and explicitly pointed out that these are benchmarks that were developed in a different context and should not be overinterpreted (see figure captions). Changes have been made throughout the manuscript (some examples included below) and a new paragraph was added to the discussion in which we address the points raised by the reviewer/editor:</p><p>Abstract:</p><p>“While longitudinal reliability was rather limited at the individual level, it was comparably higher for acquisition but not extinction at the group-level.”</p><p>Example from figure caption:</p><p>“Internal consistency is in the literature often interpreted using benchmarks (Kline, 2013) for unacceptable (< 0.5), poor (> 0.5 but < 0.6), questionable (> 0.6 but < 0.7), acceptable (> 0.7 but < 0.8), good (> 0.8 but < 0.9) and excellent (≥0.9). Common benchmarks in the literature for ICCs are poor (< 0.5), moderate (> 0.5 but < 0.75), good (> 0.75 but < 0.9) and excellent (≥0.9) (Koo and Li, 2016). These benchmarks are included here to provide a frame of reference but we point out that these benchmarks are arbitrary and most importantly derived from psychometric work on trait self-report measures and should hence not be overinterpreted in the context of responding in experimental paradigms in which more sources of potential error are at play (Parsons, 2020).”</p><p>Example from results:</p><p>“Internal consistency at T0 (see Figure 1A) and T1 (see Figure 1B) of raw SCRs to the CS+ and CS- ranged from 0.54 – 0.85 and for raw SCRs to the US from 0.91 – 0.94 for all phases. In comparison, internal consistency was lower for CS discrimination with values ranging from -0.01 – 0.60.”</p><p>Example from discussion:</p><p>“Yet, we would like to point out that the values we report may in fact point towards good and not limited longitudinal individual-level reliability as our interpretation is guided by benchmarks that were not developed for experimental data but from psychometric work on trait self-report measures. We acknowledge that the upper bound of maximally overvarable reliability may differ between both use cases as more sources of error are at play in experimental neuroscientific work. The problem remains that predictions in fear conditioning paradigms do not seem feasible for a longer period of time (~ 6 months) given the measures we used here. Thus, a key contribution of our work is that it empirically highlights the need to pay more attention to measurement properties in translational research in general and fear conditioning research specifically (e.g., implement reliability calculations routinely in future studies). To date, it remains an open question what “good reliability” in experimental neuroscientific work actually means (Parsons et al., 2019).”</p><disp-quote content-type="editor-comment"><p>9. Clarify Implications and Recommendations. The concrete implications of the research, and recommendations arising from it, should be clearly spelled out in the Abstract and Discussion, for the greatest utility.</p></disp-quote><p>Please see our answer to comment 4b.</p></body></sub-article></article>