Skip to contents

Function for automatically combining concepts by hierarchy mapping

Usage

automaticCorrelationCombineConcepts(
  data,
  abstractionLevel = -1,
  minCorrelation = 0.7,
  maxDaysInBetween = 1,
  heritageDriftAllowed = FALSE
)

Arguments

data

CohortContrastObject

abstractionLevel

abstraction level to use for mapping

minCorrelation

minimum correlation to use for automatic concept combining

maxDaysInBetween

minimum days between concepts to use for automatic concept combining

heritageDriftAllowed

boolean for allowing heritage drift (combining concepts from differing heritages)

Value

A CohortContrastObject with correlation-based concept merges applied. The returned object keeps the same overall structure as the input, while updating the patient-, feature-, and cohort-level tables together with the complementary mapping table to reflect the executed correlation mappings.

Examples

study <- structure(
  list(
    data_initial = data.frame(
      COHORT_DEFINITION_ID = c(rep("target", 4), rep("control", 4)),
      SUBJECT_ID = 1:8,
      COHORT_START_DATE = as.Date(rep("2020-01-01", 8)),
      COHORT_END_DATE = as.Date(rep("2020-01-10", 8))
    ),
    data_patients = data.frame(
      COHORT_DEFINITION_ID = c(
        "target", "target", "target", "target", "target", "target",
        "control", "control", "control"
      ),
      PERSON_ID = c(1, 1, 2, 2, 3, 4, 5, 6, 7),
      CONCEPT_ID = c(1, 2, 1, 2, 1, 2, 1, 2, 1),
      CONCEPT_NAME = c(
        "Concept A", "Concept B", "Concept A", "Concept B", "Concept A",
        "Concept B", "Concept A", "Concept B", "Concept A"
      ),
      HERITAGE = rep("drug_exposure", 9),
      ABSTRACTION_LEVEL = rep(-1, 9),
      PREVALENCE = rep(1, 9),
      TIME_TO_EVENT = I(list(0, 1, 0, 1, 0, 1, 0, 1, 0))
    ),
    data_features = data.frame(
      CONCEPT_ID = c(1, 2),
      CONCEPT_NAME = c("Concept A", "Concept B"),
      ABSTRACTION_LEVEL = c(-1, -1),
      TARGET_SUBJECT_COUNT = c(3, 3),
      CONTROL_SUBJECT_COUNT = c(2, 1),
      TIME_TO_EVENT = I(list(c(0, 0, 0), c(1, 1, 1))),
      TARGET_SUBJECT_PREVALENCE = c(0.75, 0.75),
      CONTROL_SUBJECT_PREVALENCE = c(0.5, 0.25),
      PREVALENCE_DIFFERENCE_RATIO = c(1.5, 3),
      CHI2Y = c(TRUE, TRUE),
      CHI2Y_P_VALUE = c(0.1, 0.01),
      LOGITTEST = c(FALSE, FALSE),
      LOGITTEST_P_VALUE = c(1, 1),
      HERITAGE = c("drug_exposure", "drug_exposure")
    ),
    data_person = data.frame(
      PERSON_ID = 1:8,
      YEAR_OF_BIRTH = 1980:1987,
      GENDER_CONCEPT_ID = c(8507, 8532, 8507, 8532, 8507, 8532, 8507, 8532)
    ),
    complementaryMappingTable = data.frame(
      CONCEPT_ID = integer(),
      CONCEPT_NAME = character(),
      NEW_CONCEPT_ID = integer(),
      NEW_CONCEPT_NAME = character(),
      ABSTRACTION_LEVEL = integer(),
      TYPE = character()
    ),
    config = list(
      runChi2YTests = TRUE,
      runLogitTests = FALSE,
      presenceFilter = 0,
      prevalenceCutOff = 0
    )
  ),
  class = "CohortContrastObject"
)

combined <- automaticCorrelationCombineConcepts(
  study,
  abstractionLevel = -1,
  minCorrelation = 0.5,
  maxDaysInBetween = 2
)
#> ! Automatic correlation mapping iteration 1
combined$data_features
#>   CONCEPT_ID CONCEPT_NAME ABSTRACTION_LEVEL TARGET_SUBJECT_COUNT
#> 1          1    Concept A                -1                    3
#> 2          2    Concept B                -1                    3
#>   CONTROL_SUBJECT_COUNT TIME_TO_EVENT TARGET_SUBJECT_PREVALENCE
#> 1                     2       0, 0, 0                      0.75
#> 2                     1       1, 1, 1                      0.75
#>   CONTROL_SUBJECT_PREVALENCE PREVALENCE_DIFFERENCE_RATIO CHI2Y CHI2Y_P_VALUE
#> 1                       0.50                         1.5  TRUE          0.10
#> 2                       0.25                         3.0  TRUE          0.01
#>   LOGITTEST LOGITTEST_P_VALUE      HERITAGE
#> 1     FALSE                 1 drug_exposure
#> 2     FALSE                 1 drug_exposure
combined$complementaryMappingTable
#> [1] CONCEPT_ID        CONCEPT_NAME      NEW_CONCEPT_ID    NEW_CONCEPT_NAME 
#> [5] ABSTRACTION_LEVEL TYPE             
#> <0 rows> (or 0-length row.names)