Function for automatically combining concepts by hierarchy mapping
Source:R/combine_concepts.R
automaticCorrelationCombineConcepts.RdFunction for automatically combining concepts by hierarchy mapping
Usage
automaticCorrelationCombineConcepts(
data,
abstractionLevel = -1,
minCorrelation = 0.7,
maxDaysInBetween = 1,
heritageDriftAllowed = FALSE
)Arguments
- data
CohortContrastObject
- abstractionLevel
abstraction level to use for mapping
- minCorrelation
minimum correlation to use for automatic concept combining
- maxDaysInBetween
minimum days between concepts to use for automatic concept combining
- heritageDriftAllowed
boolean for allowing heritage drift (combining concepts from differing heritages)
Value
A CohortContrastObject with correlation-based concept merges applied. The returned object keeps the same overall structure as the input, while updating the patient-, feature-, and cohort-level tables together with the complementary mapping table to reflect the executed correlation mappings.
Examples
study <- structure(
list(
data_initial = data.frame(
COHORT_DEFINITION_ID = c(rep("target", 4), rep("control", 4)),
SUBJECT_ID = 1:8,
COHORT_START_DATE = as.Date(rep("2020-01-01", 8)),
COHORT_END_DATE = as.Date(rep("2020-01-10", 8))
),
data_patients = data.frame(
COHORT_DEFINITION_ID = c(
"target", "target", "target", "target", "target", "target",
"control", "control", "control"
),
PERSON_ID = c(1, 1, 2, 2, 3, 4, 5, 6, 7),
CONCEPT_ID = c(1, 2, 1, 2, 1, 2, 1, 2, 1),
CONCEPT_NAME = c(
"Concept A", "Concept B", "Concept A", "Concept B", "Concept A",
"Concept B", "Concept A", "Concept B", "Concept A"
),
HERITAGE = rep("drug_exposure", 9),
ABSTRACTION_LEVEL = rep(-1, 9),
PREVALENCE = rep(1, 9),
TIME_TO_EVENT = I(list(0, 1, 0, 1, 0, 1, 0, 1, 0))
),
data_features = data.frame(
CONCEPT_ID = c(1, 2),
CONCEPT_NAME = c("Concept A", "Concept B"),
ABSTRACTION_LEVEL = c(-1, -1),
TARGET_SUBJECT_COUNT = c(3, 3),
CONTROL_SUBJECT_COUNT = c(2, 1),
TIME_TO_EVENT = I(list(c(0, 0, 0), c(1, 1, 1))),
TARGET_SUBJECT_PREVALENCE = c(0.75, 0.75),
CONTROL_SUBJECT_PREVALENCE = c(0.5, 0.25),
PREVALENCE_DIFFERENCE_RATIO = c(1.5, 3),
CHI2Y = c(TRUE, TRUE),
CHI2Y_P_VALUE = c(0.1, 0.01),
LOGITTEST = c(FALSE, FALSE),
LOGITTEST_P_VALUE = c(1, 1),
HERITAGE = c("drug_exposure", "drug_exposure")
),
data_person = data.frame(
PERSON_ID = 1:8,
YEAR_OF_BIRTH = 1980:1987,
GENDER_CONCEPT_ID = c(8507, 8532, 8507, 8532, 8507, 8532, 8507, 8532)
),
complementaryMappingTable = data.frame(
CONCEPT_ID = integer(),
CONCEPT_NAME = character(),
NEW_CONCEPT_ID = integer(),
NEW_CONCEPT_NAME = character(),
ABSTRACTION_LEVEL = integer(),
TYPE = character()
),
config = list(
runChi2YTests = TRUE,
runLogitTests = FALSE,
presenceFilter = 0,
prevalenceCutOff = 0
)
),
class = "CohortContrastObject"
)
combined <- automaticCorrelationCombineConcepts(
study,
abstractionLevel = -1,
minCorrelation = 0.5,
maxDaysInBetween = 2
)
#> ! Automatic correlation mapping iteration 1
combined$data_features
#> CONCEPT_ID CONCEPT_NAME ABSTRACTION_LEVEL TARGET_SUBJECT_COUNT
#> 1 1 Concept A -1 3
#> 2 2 Concept B -1 3
#> CONTROL_SUBJECT_COUNT TIME_TO_EVENT TARGET_SUBJECT_PREVALENCE
#> 1 2 0, 0, 0 0.75
#> 2 1 1, 1, 1 0.75
#> CONTROL_SUBJECT_PREVALENCE PREVALENCE_DIFFERENCE_RATIO CHI2Y CHI2Y_P_VALUE
#> 1 0.50 1.5 TRUE 0.10
#> 2 0.25 3.0 TRUE 0.01
#> LOGITTEST LOGITTEST_P_VALUE HERITAGE
#> 1 FALSE 1 drug_exposure
#> 2 FALSE 1 drug_exposure
combined$complementaryMappingTable
#> [1] CONCEPT_ID CONCEPT_NAME NEW_CONCEPT_ID NEW_CONCEPT_NAME
#> [5] ABSTRACTION_LEVEL TYPE
#> <0 rows> (or 0-length row.names)