Tesi di specializzazione

Author

Daniele Riccucci

Cleaning

Code
# data %>%
#   filter(age < 18 | is.na(age) | age > 100) %>%
#   select(study_id, age)
#    study_id       age
#    <chr>        <dbl>
#  1 2192017008      13
#  2 2840038946      17
#  3 2900134333      16
#  4 2019032888      17
#  5 2400029191      14
#  6 2692028597       0 -- fixed
#  7 2292024038       0 -- fixed
#  8 2931047341      14
#  9 2851043805      12
# 10 2981041559      17
# 11 2951036174      17
# 12 2891035628       1 -- fixed
# 13 2391033346      16
# 14 2731031508      12
# 15 2681025310      17
# 16 2170023623     167 -- fixed
# 17 1349042713      17
# 18 2019033052      14
# 19 1499.013.450    15
# 20 1049013802      17
# 21 1789.011.265    17

# data %>%
#   filter(is.na(sex.factor)) %>%
#   select(study_id, sex, sex.factor)
#   study_id     sex
#   <chr>        <dbl+lbl>
# 1 2180024326   NA -- fixed
# 2 2971033025   NA -- fixed
# 3 1039013496   NA -- fixed
# 4 2071039462   NA -- fixed
# 5 1399.049.496 NA -- fixed

# consistency check for previous abdominal surgery excluding Crohn
# data %>%
#   filter(
#     prev_abd_surg == 0 & (
#       prev_abd_surg_typ___1 == 1 |
#         prev_abd_surg_typ___2 == 1 |
#         prev_abd_surg_typ___3 == 1 |
#         prev_abd_surg_typ___4 == 1 |
#         prev_abd_surg_typ___5 == 1 |
#         prev_abd_surg_typ___6 == 1 |
#         prev_abd_surg_typ___7 == 1
#     )
#   ) %>%
#   select(
#     prev_abd_surg, # Previous abdominal surgery (EXCLUDING procedures for Crohn's)
#     prev_abd_surg_typ___1, # appendicectomy
#     prev_abd_surg_typ___2, # cholecystectomy
#     prev_abd_surg_typ___3, # gynecologic procedure (including C section)
#     prev_abd_surg_typ___4, # gastric surgery
#     prev_abd_surg_typ___5, # colon surgery
#     prev_abd_surg_typ___6, # hepatic surgery
#     prev_abd_surg_typ___7, # other
#   )
# all good

# consistency check for previous Crohn surgery
# data %>%
#   filter(
#     location_previous_crohn___10 == 1 & ( # no previous operations
#       location_previous_crohn___1 == 1 | # terminal ileum
#         location_previous_crohn___2 == 1 | # ileum
#         location_previous_crohn___3 == 1 | # jejenum
#         location_previous_crohn___4 == 1 | # right colon
#         location_previous_crohn___5 == 1 | # transverse colon
#         location_previous_crohn___6 == 1 | # left colon
#         location_previous_crohn___7 == 1 | # rectum
#         location_previous_crohn___8 == 1 | # perianal
#         location_previous_crohn___9 == 1 # uppergi
#     )
#   ) %>%
#   select(
#     study_id,
#     location_previous_crohn___1, # terminal ileum
#     location_previous_crohn___2, # ileum
#     location_previous_crohn___3, # jejenum
#     location_previous_crohn___4, # right colon
#     location_previous_crohn___5, # transverse colon
#     location_previous_crohn___6, # left colon
#     location_previous_crohn___7, # rectum
#     location_previous_crohn___8, # perianal
#     location_previous_crohn___9, # uppergi
#     location_previous_crohn___10, # no previous operations
#   )
#   study_id
#   <chr>
# 1 2022046044 -- fixed
# 2 2871026699 -- fixed

# problematic admiss_date, disch_date, surg_date
# data %>%
#   filter(
#     difftime(as.Date(disch_date), as.Date(surg_date), units = "days") < 0 |
#       difftime(as.Date(surg_date), as.Date(admiss_date), units = "days") < 0 |
#       difftime(as.Date(disch_date), as.Date(admiss_date), units = "days") < 0 |
#       difftime(as.Date(surg_date), as.Date(admiss_date), units = "days") > 100 |
#       difftime(as.Date(disch_date), as.Date(admiss_date)) > 100
#   ) %>%
#   select(study_id, surg_date, admiss_date, disch_date)
## wrong dates
#   study_id     surg_date  admiss_date disch_date
#   <chr>        <date>     <date>      <date>
# 1 2482045216   2022-09-28 2022-09-29  2022-10-03 -- fixed
# 2 201933269    2019-06-27 2019-07-26  2019-07-03 -- fixed
# 3 2482058889   2023-01-04 2023-01-30  2023-01-12 -- fixed
# 4 2082019572   2022-03-17 2022-03-09  2022-03-16 -- fixed
# 5 2402019398   2022-03-11 2022-03-13  2022-03-24 -- fixed
# 6 2561046387   2021-10-28 2021-10-25  2021-10-02 -- fixed
# 7 1263.037.284 2019-10-02 2013-09-30  2013-10-09 -- fixed
# 8 2019041728   2019-08-26 2019-08-28  2019-09-02 -- wrong nosologic code
## long stay
#   study_id     surg_date  admiss_date disch_date
#   <chr>        <date>     <date>      <date>
# 1 2612019003   2022-03-13 2021-03-13  2022-03-21 -- fixed
# 2 2391011962   2021-02-02 2021-01-21  2021-06-04 -- correct
# 3 2010139691   2020-06-08 2020-01-26  2020-06-20 -- fixed
# 4 1263.037.284 2019-10-02 2013-09-30  2013-10-09 -- fixed
# 5 1758.168.350 2019-06-25 2018-06-24  2019-07-01 -- unfixable, all three dates are wrong

# data %>%
#   filter(
#     (dis_beahviour___1 == 1 | dis_beahviour___2 == 1) & dis_beahviour___3 == 1
#   ) %>%
#   select(study_id, dis_beahviour___1, dis_beahviour___2, dis_beahviour___3)
#   study_id   dis_beahviour___1 dis_beahviour___2 dis_beahviour___3
#   <chr>      <dbl+lbl>         <dbl+lbl>         <dbl+lbl>
# 1 2910130481 0 [Unchecked]     1 [Checked]       1 [Checked]

df_fix <- data %>%
  dplyr::mutate(
    age = case_when(
      study_id == 2170023623 ~ 67,
      study_id == 2891035628 ~ 26,
      study_id == 2692028597 ~ 50,
      study_id == 2292024038 ~ 77,
      .default = as.integer(age),
    ),
    alb_surg = case_when(
      study_id == 2191039889 ~ "40.4",
      .default = alb_surg,
    ),
    previous_biologics = case_when(
      study_id == 2340035674 ~ 0,
      .default = previous_biologics,
    ),
    adalim_end = case_when(
      study_id == 2271028772 ~ "16-01-2021",
      study_id == 2391011962 ~ "01-05-2012",
      .default = adalim_end,
    ),
    sex.factor = case_when(
      study_id == 2180024326 ~ "Female",
      study_id == 2971033025 ~ "Male",
      study_id == 1039013496 ~ "Male",
      study_id == 2071039462 ~ "Male",
      study_id == "1399.049.496" ~ "Male",
      .default = sex.factor,
    ),
    admiss_date = case_when(
      study_id == 2482045216 ~ as.Date("2022-09-27"),
      study_id == 2482058889 ~ as.Date("2023-01-03"),
      study_id == 201933269 ~ as.Date("2019-06-26"),
      study_id == 2402019398 ~ as.Date("2022-03-10"),
      study_id == "1263.037.284" ~ as.Date("2019-09-30"),
      study_id == 2612019003 ~ as.Date("2022-03-13"),
      study_id == 2010139691 ~ as.Date("2020-05-19"),
      .default = as.Date(admiss_date),
    ),
    # admiss_date = structure(as.Date(admiss_date), label = "Admission date"),
    disch_date = case_when(
      study_id == "1263.037.284" ~ as.Date("2019-10-09"),
      study_id == 2561046387 ~ as.Date("2021-11-02"),
      study_id == 2082019572 ~ as.Date("2022-03-23"),
      .default = as.Date(disch_date)
    ),
    # disch_date = structure(as.Date(disch_date), label = "Discharge date"),
    # surg_date = structure(as.Date(surg_date), label = "Surgery date"),
    # dis_beahviour___1 = as.factor(dis_beahviour___1),
    # dis_beahviour___2 = as.factor(dis_beahviour___2),
    # dis_beahviour___3 = case_when(
    #   study_id == 2910130481 ~ 0,
    #   .default = as.factor(dis_beahviour___3),
    # ),
    # dis_beahviour___3.factor = case_when(
    #   study_id == 2910130481 ~ "Unchecked",
    #   .default = dis_beahviour___3.factor,
    # ),
    dis_behavior = case_when(
      (dis_beahviour___1 == 1 & dis_beahviour___2 == 0) ~ 0,
      (dis_beahviour___1 == 0 & dis_beahviour___2 == 1) ~ 1, # corrects 2910130481
      (dis_beahviour___1 == 1 & dis_beahviour___2 == 1) ~ 2,
      (dis_beahviour___1 == 0 & dis_beahviour___2 == 0 & dis_beahviour___3 == 1) ~ 3,
      .default = NA,
    ),
    dis_behavior = structure(
      factor(dis_behavior,
        levels = c(0, 1, 2, 3),
        labels = c("Stenotic", "Fistulizing", "Stenotic + fistulizing", "None")
      ),
      label = "Disease behavior"
    ),
    location_previous_crohn___1 = case_when(
      study_id == 2022046044 ~ factor(0, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___1),
    ),
    location_previous_crohn___5 = case_when(
      study_id == 2871026699 ~ factor(0, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___5),
    ),
    location_previous_crohn___6 = case_when(
      study_id == 2871026699 ~ factor(0, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___6),
    ),
    location_previous_crohn___8 = case_when(
      study_id == 2022046044 ~ factor(1, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___8),
    ),
    location_previous_crohn___10 = case_when(
      study_id == 2022046044 ~ factor(0, levels = c(0, 1)),
      study_id == 2871026699 ~ factor(1, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___10),
    ),
    location_previous_crohn___2 = as.factor(location_previous_crohn___2),
    location_previous_crohn___3 = case_when(
      study_id == "2551047268" ~ factor(1, levels = c(0, 1)),
      .default = as.factor(location_previous_crohn___3)
    ),
    location_previous_crohn___4 = as.factor(location_previous_crohn___4),
    location_previous_crohn___7 = as.factor(location_previous_crohn___7),
    location_previous_crohn___9 = as.factor(location_previous_crohn___9),
    alb_surg = case_when(
      study_id == "2019041728" ~ "40.4",
      study_id == "2641033696" ~ "43.5",
      study_id == "2391039025" ~ "39.3",
      study_id == "1469044516" ~ "37.7",
      .default = alb_surg,
    ),
  ) %>%
  rename(
    cci_age = age_ce7bf7,
    cci_age.factor = age_ce7bf7.factor,
  )

df_clean <- df_fix %>%
  mutate(
    age = structure(age, label = "Age", units = "years"),
    bmi = structure(bmi, label = "BMI", units = "kg/m\u{00B2}"),
    sex = structure(factor(sex, levels = c(0, 1), labels = c("Female", "Male")), label = "Sex"),
    sex.factor = structure(as.factor(sex.factor), label = "Gender"),
    cci_gt2 = structure(factor(ifelse(cci_total_sc > 2, 1, 0), levels = c(0, 1), labels = c("≤ 2", "> 2")),
      label = "Charlson Comorbidity Index ≤2"
    ),
    cci_total_sc = structure(as.integer(cci_total_sc), label = "Charlson Comorbidity Index score"),
    years_disease = structure(as.integer(years_disease), label = "Disease duration", units = "years"),
    surg_durat = structure(as.integer(surg_durat), label = "Surgery duration", units = "minutes"),
    asa_score = structure(factor(asa_score, levels = c(1:4)), label = "ASA score"),
    dis_behavior = case_when(
      (dis_beahviour___1 == 1 & dis_beahviour___2 == 0) ~ 0,
      (dis_beahviour___1 == 0 & dis_beahviour___2 == 1) ~ 1,
      (dis_beahviour___1 == 1 & dis_beahviour___2 == 1) ~ 2,
      (dis_beahviour___1 == 0 & dis_beahviour___2 == 0 & dis_beahviour___3 == 1) ~ 3,
      .default = NA,
    ),
    dis_behavior = structure(
      factor(dis_behavior,
        levels = c(0, 1, 2, 3),
        labels = c("Stenotic", "Fistulizing", "Stenotic + fistulizing", "None")
      ),
      label = "Disease behavior"
    ),
    recurrence = structure(factor(recurrence, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Recurrence on previously operated site"
    ),
    los_before = structure(as.integer(difftime(surg_date, admiss_date, units = "days")),
      label = "Length of stay before surgery, days"
    ),
    los_after = structure(as.integer(difftime(disch_date, surg_date, units = "days")),
      label = "Length of stay after surgery, days"
    ),
    los_total = structure(as.integer(difftime(disch_date, admiss_date, units = "days")),
      label = "Hospital length of stay, days"
    ),
    laparoscopy = structure(factor(
      laparoscopy,
      levels = c(1, 2, 3, 4, 5, 6, 7),
      labels = c(
        "Laparoscopy",
        "Laparoscopic converted to open",
        "Hand-assisted",
        "Open",
        "Robotic",
        "Robotic converted to laparoscopic",
        "Robotic converted to open"
      )
    )),
    surg_approach = structure(droplevels(laparoscopy), label = "Surgical approach"),
    surg_timing = structure(factor(surg_timing, levels = c(1, 2, 3), labels = c("Elective", "Urgent", "Emergency")),
      label = "Surgical timing"
    ), # very few urgent and emergent...
    surg_itu = structure(
      factor(surg_itu,
        levels = c(1, 2, 3),
        labels = c("No", "Postop ICU scheduled", "Postop ICU unexpected")
      ),
      label = "Intesive Care admission after surgery"
    ),
    surg_disease_multi = case_when(
      rowSums(across(starts_with("location_disease___"))) >= 2 ~ 1,
      rowSums(across(starts_with("location_disease___"))) == 1 ~ 0,
      rowSums(across(starts_with("location_disease___"))) == 0 ~ NA, # surgery for stoma closure usually
    ),
    surg_disease_multi = structure(factor(surg_disease_multi, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Multiple localizations of disease requiring surgery"
    ),
    location_disease___1 = structure(factor(location_disease___1, levels = c(0, 1)),
      label = "terminal ileum"
    ),
    location_disease___2 = structure(factor(location_disease___2, levels = c(0, 1)),
      label = "ileum"
    ),
    location_disease___3 = structure(factor(location_disease___3, levels = c(0, 1)),
      label = "jejunum"
    ),
    location_disease___4 = structure(factor(location_disease___4, levels = c(0, 1)),
      label = "right colon"
    ),
    location_disease___5 = structure(factor(location_disease___5, levels = c(0, 1)),
      label = "transverse colon"
    ),
    location_disease___6 = structure(factor(location_disease___6, levels = c(0, 1)),
      label = "left colon"
    ),
    location_disease___7 = structure(factor(location_disease___7, levels = c(0, 1)),
      label = "rectum"
    ),
    location_disease___8 = structure(factor(location_disease___8, levels = c(0, 1)),
      label = "perianal"
    ),
    location_disease___9 = structure(factor(location_disease___9, levels = c(0, 1)),
      label = "upper GI including duodenum"
    ),
    resection = ifelse(termileum_surg___1 == 1 | ileum_surg___1 == 1 | jej_surg___1 == 1 |
      colon_surg___1 == 1 | ugi_surg___1 == 1, 1, 0),
    stricturoplasty = ifelse(termileum_surg___2 == 1 | ileum_surg___2 == 1 | jej_surg___2 == 1 |
      colon_surg___2 == 1 | ugi_surg___2 == 1, 1, 0),
    fistulectomy = ifelse(ileum_surg___4 == 1 | jej_surg___4 == 1 | colon_surg___4 == 1 | ugi_surg___4 == 1, 1, 0),
    perianal_surg = structure(factor(perianal_surg, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Perianal surgery"
    ),
    stoma = structure(factor(stoma_typ, levels = c(0:6), labels = c(
      "No stoma",
      "Ileostomia loop",
      "Ileostomia terminale",
      "Ileostomia loop-end",
      "Ileostomia a canna di fucile",
      "Colostomia loop",
      "Colostomia terminale"
    )), label = "Type of stoma"),
    surg_procedure = structure(paste(
      ifelse(termileum_surg___3, "resezione ileocecale", NA_character_),
      ifelse(resection, "resezione non ileocecale", NA_character_),
      ifelse(stricturoplasty, "stricturoplastica", NA_character_),
      ifelse(fistulectomy, "fistulectomia", NA_character_),
      ifelse(perianal_surg == "Yes", "chirurgia perianale", NA_character_),
      case_when(
        stoma == "No stoma" ~ NA_character_,
        grepl("terminale", stoma) ~ "stomia terminale",
        grepl("loop|fucile", stoma) ~ "stomia loop",
      ),
      ifelse(termileum_surg___4 == 1 | ileum_surg___3 == 1 | jej_surg___3 == 1 |
        colon_surg___3 == 1 | colon_surg___5 == 1 | ugi_surg___3 == 1, "altre procedure", NA_character_),
      sep = " + "
    ), label = "Surgical procedure"),
    surg_procedure = glue::trim(gsub(" \\+ NA|NA \\+ ", "", surg_procedure)),
    surg_procedure_simple = case_when(
      termileum_surg___3 == 1 ~ 4,
      resection == 1 ~ 3,
      stricturoplasty == 1 | fistulectomy == 1 |
        ((termileum_surg___3 == 0 & resection == 0 | stricturoplasty == 0 & fistulectomy == 0) & perianal_surg == "Yes") ~ 2,
      stoma_surg_close == 1 ~ 1,
      termileum_surg___4 == 1 | ileum_surg___3 == 1 | jej_surg___3 == 1 |
        colon_surg___3 == 1 | colon_surg___5 == 1 | ugi_surg___3 | stoma != "No stoma" ~ 0,
      .default = NA,
    ),
    surg_procedure_simple = structure(factor(surg_procedure_simple, levels = c(0:4), labels = c(
      "Other procedures",
      "Stoma closure",
      "Stricturoplasty, fistulectomy or perianal surgery",
      "Resection (non ileocecal)",
      "Resection ileocecal"
    )), label = "Surgical procedure (simple)"),
    perforation = factor(perforation, levels = c(0, 1), labels = c("No", "Yes")),
    abscess = structure(factor(abscess, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Abscess at surgery"
    ),
    abscess_multi = case_when(
      is.na(abscess) ~ NA,
      rowSums(across(starts_with("abscess_location___"))) >= 2 ~ 2,
      rowSums(across(starts_with("abscess_location___"))) == 1 ~ 1,
      rowSums(across(starts_with("abscess_location___"))) == 0 ~ 0,
      .default = NA,
    ),
    abscess_multi = structure(
      factor(abscess_multi,
        levels = c(0:2),
        labels = c("No abscess", "Single abscess", "Multiple abscesses")
      ),
      label = "Abscesses at surgery"
    ),
    abscess_location = structure(paste(
      ifelse(abscess_location___1, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___1)), NA_character_), # abdominal wall
      ifelse(abscess_location___2, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___2)), NA_character_), # mesentery
      ifelse(abscess_location___3, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___3)), NA_character_), # intraparietal
      ifelse(abscess_location___4, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___4)), NA_character_), # pelvic
      ifelse(abscess_location___5, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___5)), NA_character_), # retroperitoneal
      ifelse(abscess_location___6, sub(".*choice=(.*)\\)", "\\1", label(abscess_location___6)), NA_character_), # other
      sep = " + "
    ), label = "Location of abscesses"),
    abscess_location = glue::trim(gsub("( \\+ )?NA?( \\+ )?", "", abscess_location)),
    training = structure(factor(training, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Training surgery (trainee performing part of all operation under supervision)"
    ),
    po_death = structure(factor(po_death, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Death within 30 days from surgery"
    ),
    surg_date = structure(as.Date(surg_date), label = "Surgery date"),
    po_inf_when = structure(as.Date(po_inf_when), label = "Date of post-surgery infection diagnosis"),
    # variable for time-to-infection in days
    po_inf_time = structure(difftime(po_inf_when, surg_date, units = "days"), label = "Time from surgery to first infection"),
    # variable for infection within 30d post surgery
    po_inf_30d = case_when(
      (po_inf_time >= 0 & po_inf_time <= 30) ~ 1,
      # is.na(po_inf_yn) ~ NA,
      .default = 0,
    ),
    po_inf_30d = structure(
      factor(po_inf_30d, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Any post-surgery infection within 30 days"
    ),
    # new variable considering any case with previous major abdominal surgery
    prev_surg_yn = case_when(
      if_any(
        c(
          prev_abd_surg, # Previous abdominal surgery (EXCLUDING procedures for Crohn's)
          prev_abd_surg_typ___3, # gynecologic procedure (including C section)
          prev_abd_surg_typ___4, # gastric surgery
          prev_abd_surg_typ___5, # colon surgery
          prev_abd_surg_typ___6, # hepatic surgery
        ), ~ .x == 1
      ) ~ 1,
      if_any(matches("location_previous_crohn___[1-9]$"), ~ . == 1) ~ 1,
      location_previous_crohn___10 == 1 ~ 0, # no previous Crohn surgery
      if_all(
        c(
          prev_abd_surg, # Previous abdominal surgery (EXCLUDING procedures for Crohn's)
          prev_abd_surg_typ___3, # gynecologic procedure (including C section)
          prev_abd_surg_typ___4, # gastric surgery
          prev_abd_surg_typ___5, # colon surgery
          prev_abd_surg_typ___6, # hepatic surgery
          matches("location_previous_crohn___[1-9]$") # any Crohn surgery excluding 10 (none)
        ), ~ .x == 0,
      ) ~ 0,
      .default = 0,
    ),
    prev_surg_yn = structure(
      factor(prev_surg_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Any previous major abdominal surgery"
    ),
    pre_op_atb_yn = structure(factor(pre_op_atb_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Antibiotics within 4 weeks before surgery"
    ),
    # steroid_surg_yn = case_when(
    #   (steroids_ongoing == 1 | ster_surg == 1) ~ 1, # steroids ongoing at time of surgery (from IBD or Medications)
    #   steroids_ongoing == 1 & if_any(matches("steroid_type___[1-4]"), ~ . == 1) ~ 1, # any type of steroids (from IBD medications) excluding topical
    #   (steroids_ongoing == 0 | ster_surg == 0) ~ 0,
    #   if_all(starts_with("steroid_type___"), ~ . == 0) ~ 0,
    #   .default = NA,
    # ),
    steroid_surg_yn = case_when(
      steroids_ongoing == 1 & (
        difftime(as.Date(surg_date), as.Date(steroid_start), units = c("days")) >= 14 |
          difftime(as.Date(steroid_end), as.Date(steroid_start), units = c("days")) >= 14 |
          is.na(steroid_start) | steroid_start == ""
      ) ~ 1,
      steroids_ongoing == 0 | (steroids_ongoing == 1 & (
        difftime(as.Date(surg_date), as.Date(steroid_start), units = c("days")) < 14 |
          difftime(as.Date(steroid_end), as.Date(steroid_start), units = c("days")) < 14
      )) ~ 0,
      # steroid_suspended4w == 1 & (
      # difftime(as.Date(surg_date), as.Date(steroid_start), units = c("days")) >= 14 |
      # difftime(as.Date(steroid_end), as.Date(steroid_start), units = c("days")) >= 14) ~ 1,
      .default = NA,
    ),
    steroid_surg_yn = structure(factor(steroid_surg_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Any oral steroids ongoing at surgery"
    ),
    steroid_surg_dose = case_when(
      # ster_surg == 1 & ster_surg_dose == 1 ~ 3, # prednisone > 20 mg/die or equivalent (from Medications) -- checked
      # ster_surg == 1 & ster_surg_dose == 0 ~ 2, # prednisone < 20 mg/die or equivalent (from Medications) -- checked
      # ster_surg == 1 & is.na(ster_surg_dose) ~ 1, # steroids ongoing without dosage (from Medications) -- checked
      # steroids_ongoing == 1 required as type includes stopped within 4 and 8 weeks of surgery
      steroid_surg_yn == "Yes" & steroid_type___1 == 1 ~ 3, # prednisone > 20 mg/die or equivalent (from IBD medications)
      steroid_surg_yn == "Yes" & steroid_type___2 == 1 ~ 2, # prednisone < 20 mg/die or equivalent (from IBD medications)
      # Beclomethasone group is empty
      steroid_surg_yn == "Yes" & (steroid_type___3 == 1 | steroid_type___4 == 1) ~ 1, # beclomethasone or budesonide (from IBD medications)
      # ignoring steroid_type___5 -- topical steroid
      steroid_surg_yn == "No" ~ 0, # no steroids
      .default = NA,
    ),
    steroid_surg_dose = structure(factor(
      steroid_surg_dose,
      levels = c(0, 1, 2, 3),
      labels = c("No steroids", "Other steroid", "Prednisone < 20 mg/day", "Prednisone > 20 mg/day")
    ), label = "Dose of oral steroids ongoing at surgery"),
    # steroid_surg_time = as.integer(ster_surg_time),
    inflix_start_weeks = structure(round(difftime(as.Date(surg_date), as.Date(inflix_start), units = c("weeks"))),
      label = "Weeks from start of infliximab to surgery"
    ),
    adalim_start_weeks = structure(round(difftime(as.Date(surg_date), as.Date(adalim_start), units = c("weeks"))),
      label = "Weeks from start of adalimumab to surgery"
    ),
    vedoli_start_weeks = structure(round(difftime(as.Date(surg_date), as.Date(vedo_start), units = c("weeks"))),
      label = "Weeks from start of vedolizumab to surgery"
    ),
    usteki_start_weeks = structure(round(difftime(as.Date(surg_date), as.Date(ustek_start), units = c("weeks"))),
      label = "Weeks from start of ustekinumab to surgery"
    ),
    inflix_end_weeks = structure(round(difftime(as.Date(surg_date), as.Date(inflix_end), units = c("weeks"))),
      label = "Weeks from last infliximab dose to surgery"
    ),
    adalim_end_weeks = structure(round(difftime(as.Date(surg_date), as.Date(adalim_end), units = c("weeks"))),
      label = "Weeks from last adalimumab dose to surgery"
    ),
    vedoli_end_weeks = structure(round(difftime(as.Date(surg_date), as.Date(vedo_end), units = c("weeks"))),
      label = "Weeks from last vedolizumab dose to surgery"
    ),
    usteki_end_weeks = structure(round(difftime(as.Date(surg_date), as.Date(ustek_end), units = c("weeks"))),
      label = "Weeks from last ustekinumab dose to surgery"
    ),
    # only include last dose of biologics from 24 to 8 weeks (6-2 months) before surgery vs within 8 weeks (2 months) from surgery
    # biologic_surg_8w = case_when(
    #   ((biologic == 1 & (preop_bio_current == 0 & preop_bio_current_weekstop <= 8) | preop_bio_current_weekstop <= 8) |
    #      (inflix_end_weeks <= 8 | preop_ifx_current_weekstop <= 8) |
    #      (adalim_end_weeks <= 8 | preop_ada_current_weekstop <= 8) |
    #      (vedoli_end_weeks <= 8 | preop_ved_current_weekstop <= 8) |
    #      (usteki_end_weeks <= 8 | preop_ust_current_weekstop <= 8)) ~ 1,
    #   ((biologic == 1 & (preop_bio_current_weekstop > 8 & preop_bio_current_weekstop <= 24)) |
    #      ((inflix_end_weeks > 8 & inflix_end_weeks <= 24) | (preop_ifx_current_weekstop > 8 & preop_ifx_current_weekstop <= 24)) |
    #      ((adalim_end_weeks > 8 & adalim_end_weeks <= 24) | (preop_ada_current_weekstop > 8 & preop_ada_current_weekstop <= 24)) |
    #      ((vedoli_end_weeks > 8 & vedoli_end_weeks <= 24) | (preop_ved_current_weekstop > 8 & preop_ada_current_weekstop <= 24)) |
    #      ((usteki_end_weeks > 8 & usteki_end_weeks <= 24) | (preop_ust_current_weekstop > 8 & preop_ust_current_weekstop <= 24))) ~ 0,
    #   .default = NA
    # # ),
    # biologic_surg_8w = structure(factor(biologic_surg_8w, levels = c(0, 1), labels = c("No", "Yes")),
    #                              label = "Last dose of any biologic within 8 weeks from surgery (vs last dose 2-6 months before)"),
    across(matches("(inflix|adalim|vedo|ustek)_(start|end)$"), as.Date),
    biologic_surg_8w_yn = case_when(
      # ((biologic == 1 & (preop_bio_current == 0 & preop_bio_current_weekstop <= 8) | preop_bio_current_weekstop <= 8) |
      #    (preop_ifx_current_weekstop <= 8 | preop_ada_current_weekstop <= 8 |
      #     preop_ved_current_weekstop <= 8) | preop_ust_current_weekstop <= 8)) ~ 1,
      # biologic == 0 ~ 0,
      # preop_bio_current_weekstop > 8 |
      #   preop_ifx_current_weekstop > 8 |
      #   preop_ada_current_weekstop > 8 |
      #   preop_ved_current_weekstop > 8 |
      #   preop_ust_current_weekstop > 8 ~ 0,
      previous_biologics == 0 ~ 0,
      (inflix_end_weeks <= 8 | (inflix_start_weeks >= 0 & is.na(inflix_end_weeks))) |
        (adalim_end_weeks <= 8 | (adalim_start_weeks >= 0 & is.na(adalim_end_weeks))) |
        (vedoli_end_weeks <= 8 | (vedoli_start_weeks >= 0 & is.na(vedoli_end_weeks))) |
        (usteki_end_weeks <= 8 | (usteki_start_weeks >= 0 & is.na(usteki_end_weeks))) ~ 1,
      # (is.na(inflix_end_weeks) & if_all(starts_with("discont_inflix___"), ~ .x == 0)))) |
      #   (is.na(adalim_end_weeks) & if_all(starts_with("discont_adalim___"), ~ .x == 0)))) |
      #   (is.na(vedoli_end_weeks) & if_all(starts_with("discont_vedoliz___"), ~ .x == 0)))) |
      #   (is.na(usteki_end_weeks) & if_all(starts_with("discont_ustek___"), ~ .x == 0)))) ~ 1,
      inflix_start_weeks < 0 | adalim_start_weeks < 0 | vedoli_start_weeks < 0 | usteki_start_weeks < 0 ~ 0,
      inflix_end_weeks > 8 | adalim_end_weeks > 8 | vedoli_end_weeks > 8 | usteki_end_weeks > 8 ~ 0,
      # (is.na(inflix_end) & if_any(starts_with("discont_inflix___"), ~ .x == 1)) |
      #   (is.na(adalim_end) & if_any(starts_with("discont_adalim___"), ~ .x == 1)) |
      #   (is.na(vedo_end) & if_any(starts_with("discont_vedoliz___"), ~ .x == 1)) |
      #   (is.na(ustek_end) & if_any(starts_with("discont_ustek___"), ~ .x == 1)) ~ 0,
      # (biologic == 1 & preop_bio_current == 1) |
      #   (preop_multi_bio_yn == 1 & if_all(matches("multi_(ifx|ada|ved|ust)_current"), ~ is.na(.x) | .x == 1)) ~ 0,
      .default = NA,
    ),
    biologic_surg_8w_yn = structure(factor(biologic_surg_8w_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Exposed to biologics within 8 weeks from surgery vs more or never"
    ),
    infliximab_ongoing = structure(factor(case_when(
      # previous_biologics == 0 ~ 0,
      # biologic_speficy___1 == 0 ~ 0,
      inflix_start_weeks < 0 | inflix_end_weeks > 8 ~ 0,
      inflix_end_weeks <= 8 ~ 1,
      !(is.na(inflix_start)) & is.na(inflix_end) & if_all(starts_with("discont_inflix___"), ~ .x == 0) ~ 1,
      .default = NA,
    )), label = "Infliximab within 8 weeks of surgery vs more"),
    adalimumab_ongoing = structure(factor(case_when(
      # previous_biologics == 0 ~ 0,
      # biologic_specify___2 == 0 ~ 0,
      adalim_start_weeks < 0 | adalim_end_weeks > 8 ~ 0,
      adalim_end_weeks <= 8 ~ 1,
      !(is.na(adalim_start)) & is.na(adalim_end) & if_all(starts_with("discont_adalim___"), ~ .x == 0) ~ 1,
      .default = NA,
    )), label = "Adalimumab within 8 weeks of surgery vs more"),
    vedolizumab_ongoing = structure(factor(case_when(
      # previous_biologics == 0 ~ 0,
      # biologic_specify___3 == 0 ~ 0,
      vedoli_start_weeks < 0 | vedoli_end_weeks > 8 ~ 0,
      vedoli_end_weeks <= 8 ~ 1,
      !(is.na(vedo_start)) & is.na(vedo_end) & if_all(starts_with("discont_vedoliz___"), ~ .x == 0) ~ 1,
      .default = NA,
    )), label = "Vedolizumab within 8 weeks of surgery vs more"),
    ustekinumab_ongoing = structure(factor(case_when(
      # previous_biologics == 0 ~ 0,
      # biologic_specify___3 == 0 ~ 0,
      usteki_start_weeks < 0 | usteki_end_weeks > 8 ~ 0,
      usteki_end_weeks <= 8 ~ 1,
      !(is.na(ustek_start)) & is.na(ustek_end) & if_all(starts_with("discont_ustek___"), ~ .x == 0) ~ 1,
      .default = NA,
    )), label = "Ustekinumab within 8 weeks of surgery vs more"),
    biologic_surg_8w_new = structure(factor(case_when(
      previous_biologics == 0 ~ 0,
      if_any(matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"), ~ .x == 1) ~ 1,
      if_all(matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"), ~ is.na(.x)) ~ NA,
      if_all(matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"), ~ .x == 0 | is.na(.x)) ~ 0,
      .default = NA,
    ), levels = c(0:1), labels = c("No", "Yes")), label = "Exposed to biologics within 8 weeks from surgery vs all others"),
    # aza_end_weeks = structure(difftime(as.Date(aza_end), as.Date(surg_date), units = c("weeks")), label = "Weeks from last azathioprine dose to surgery"),
    # aza_start_weeks = structure(difftime(as.Date(aza_start), as.Date(surg_date), units = c("weeks")), label = "Weeks from first azathioprine dose to surgery"),
    # aza_surg_yn = case_when(
    #   (aza_surg == 1 | aza_prev_stop == 0 | (aza_end_weeks >= 0 & aza_end_weeks <= 16) | aza_start_weeks >= 12) ~ 1,
    #   (aza_surg == 0 | aza_prev_stop == 1 | aza_end_weeks > 16 | aza_end_weeks < 0 | aza_start_weeks < 12) ~ 0,
    #   .default = NA
    # ),
    # aza_surg_yn = structure(factor(aza_surg_yn, levels = c(0, 1), labels = c("No", "Yes")),
    #                         label = "Last 5-ASA within 4 months from surgery"),
    inflix_surg_8w = case_when(
      inflix_start_weeks >= 0 & (inflix_end_weeks <= 8 |
        (is.na(inflix_end_weeks) & if_all(starts_with("discont_inflix___"), ~ .x == 0))) ~ 1,
      is.na(inflix_start_weeks) & inflix_end_weeks <= 8 ~ 1,
      inflix_end_weeks > 8 & inflix_end_weeks <= 24 ~ 0,
      # preop_bio_typ == 0 & preop_bio_current_weekstop <= 8 ~ 1,
      # preop_ifx_current_weekstop <= 8 ~ 1,
      # preop_bio_typ == 0 & preop_bio_current_weekstop > 8 & preop_bio_current_weekstop <= 24 ~ 0,
      # preop_ifx_current_weekstop > 8 & preop_ifx_current_weekstop <= 24 ~ 0,
      .default = NA,
    ),
    inflix_surg_8w = structure(factor(inflix_surg_8w, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Infliximab within 8 weeks from surgery (vs last dose 2-6 months before)"
    ),
    adalim_surg_8w = case_when(
      adalim_start_weeks >= 0 & (adalim_end_weeks <= 8 |
        (is.na(adalim_end_weeks) & if_all(starts_with("discont_adalim___"), ~ .x == 0))) ~ 1,
      is.na(adalim_start_weeks) & adalim_end_weeks <= 8 ~ 1,
      adalim_end_weeks > 8 & adalim_end_weeks <= 24 ~ 0,
      # preop_bio_typ == 1 & preop_bio_current_weekstop <= 8 ~ 1,
      # preop_ada_current_weekstop <= 8 ~ 1,
      # preop_bio_typ == 1 & preop_bio_current_weekstop > 8 & preop_bio_current_weekstop <= 24 ~ 0,
      # preop_ada_current_weekstop > 8 & preop_ifx_current_weekstop <= 24 ~ 0,
      .default = NA,
    ),
    adalim_surg_8w = structure(factor(adalim_surg_8w, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Adalimumab within 8 weeks from surgery (vs last dose 2-6 months before)"
    ),
    vedoli_surg_8w = case_when(
      vedoli_start_weeks >= 0 & (vedoli_end_weeks <= 8 |
        (is.na(vedoli_end_weeks) & if_all(starts_with("discont_ustek___"), ~ .x == 0))) ~ 1,
      is.na(vedoli_start_weeks) & vedoli_end_weeks <= 8 ~ 1,
      vedoli_end_weeks > 8 & vedoli_end_weeks <= 24 ~ 0,
      # preop_bio_typ == 2 & preop_bio_current_weekstop <= 8 ~ 1,
      # preop_ved_current_weekstop <= 8 ~ 1,
      # preop_bio_typ == 2 & preop_bio_current_weekstop > 8 & preop_bio_current_weekstop <= 24 ~ 0,
      # preop_ved_current_weekstop > 8 & preop_ved_current_weekstop <= 24 ~ 0,
      .default = NA,
    ),
    vedoli_surg_8w = structure(factor(vedoli_surg_8w, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Vedolizumab within 8 weeks from surgery (vs last dose 2-6 months before)"
    ),
    usteki_surg_8w = case_when(
      usteki_start_weeks >= 0 & (usteki_end_weeks <= 8 |
        (is.na(usteki_end_weeks) & if_all(starts_with("discont_vedoliz___"), ~ .x == 0))) ~ 1,
      is.na(usteki_start_weeks) & usteki_end_weeks <= 8 ~ 1,
      usteki_end_weeks > 8 & usteki_end_weeks <= 24 ~ 0,
      # preop_bio_typ == 3 & preop_bio_current_weekstop <= 8 ~ 1,
      # preop_ust_current_weekstop <= 8 ~ 1,
      # preop_bio_typ == 3 & preop_bio_current_weekstop > 8 & preop_bio_current_weekstop <= 24 ~ 0,
      # preop_ust_current_weekstop > 8 & preop_ust_current_weekstop <= 24 ~ 0,
      .default = NA,
    ),
    usteki_surg_8w = structure(factor(usteki_surg_8w, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Ustekinumab within 8 weeks from surgery (vs last dose 2-6 months before)"
    ),
    biologics_num = case_when(
      previous_biologics == 1 ~ rowSums(across(starts_with("biologic_specify___"), ~ as.integer(.x))),
      # biologic == 1 & preop_multi_bio_yn == 0 ~ 1,
      # biologic == 1 & preop_multi_bio_yn == 1 ~ 1 + rowSums(across(starts_with("preop_multi_bio_typ___"), ~ as.integer(.x))),
      previous_biologics == 0 ~ 0, # | biologic == 0 ~ 0,
      .default = NA,
    ),
    biologics_num = structure(factor(biologics_num, levels = c(0:4), labels = c("0", "1", "2", "3", "4")),
      label = "Number of biologics before surgery"
    ),
    previous_biologics = structure(factor(previous_biologics, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Any biologic in patient history"
    ),
    alb_surg = sub(",", ".", alb_surg),
    alb_surg = structure(ifelse(as.double(alb_surg) > 10, as.double(alb_surg) / 10, as.double(alb_surg)),
      label = "Albumin at surgery", units = "g/dL"
    ),
  ) %>%
  filter(
    age >= 18 &
      !is.na(po_inf_yn) &
      study_id != "2019041728" & # patient data no longer available
      study_id != "2071024413" & # chirurgia oncologica
      los_total >= 0 & los_total < 200 & # 2110024554 for missing dimiss_date
      los_before >= 0 & los_after >= 0
  )

Missing data for:

  • 2811111878

Data checks

Graphing done with ggplot2 [1], gridExtra [2] and cowplot [3].

Code
age_qq <- ggplot2::ggplot(df_clean, aes(sample = age)) +
  qqplotr::stat_qq_band(fill = "#E69F00", alpha = 0.25) +
  qqplotr::stat_qq_line(color = "#E69F00") +
  qqplotr::stat_qq_point() +
  labs(
    title = "Normal Q-Q Plot for Age",
    x = "Theoretical Quantiles",
    y = "Sample Quantiles",
  ) +
  theme_bw()

bmi_qq <- ggplot2::ggplot(df_clean, aes(sample = bmi)) +
  qqplotr::stat_qq_band(fill = "#56B4E9", alpha = 0.25) +
  qqplotr::stat_qq_line(color = "#56B4E9") +
  qqplotr::stat_qq_point() +
  labs(
    title = "Normal Q-Q Plot for BMI",
    x = "Theoretical Quantiles",
    y = "Sample Quantiles",
  ) +
  theme_bw()

alb_qq <- ggplot2::ggplot(df_clean, aes(sample = alb_surg)) +
  qqplotr::stat_qq_band(fill = "#009E73", alpha = 0.25) +
  qqplotr::stat_qq_line(color = "#009E73") +
  qqplotr::stat_qq_point() +
  labs(
    title = "Normal Q-Q Plot for albumine at surgery",
    x = "Theoretical Quantiles",
    y = "Sample Quantiles",
  ) +
  theme_bw()

yrd_qq <- ggplot2::ggplot(df_clean, aes(sample = years_disease)) +
  qqplotr::stat_qq_band(fill = "#F0E442", alpha = 0.25) +
  qqplotr::stat_qq_line(color = "#F0E442") +
  qqplotr::stat_qq_point() +
  labs(
    title = "Normal Q-Q Plot for years of disease",
    x = "Theoretical Quantiles",
    y = "Sample Quantiles",
  ) +
  theme_bw()

srd_qq <- ggplot2::ggplot(df_clean, aes(sample = surg_durat)) +
  qqplotr::stat_qq_band(fill = "#0072B2", alpha = 0.25) +
  qqplotr::stat_qq_line(color = "#0072B2") +
  qqplotr::stat_qq_point() +
  labs(
    title = "Normal Q-Q Plot for surgery duration",
    x = "Theoretical Quantiles",
    y = "Sample Quantiles",
  ) +
  theme_bw()

gridExtra::grid.arrange(age_qq, bmi_qq, alb_qq, yrd_qq, srd_qq, ncol = 3, nrow = 2)

Code
age_den <- ggplot(df_clean, aes(x = age)) +
  geom_density(fill = "#E69F00", alpha = 0.25) +
  geom_histogram(aes(y = after_stat(density)), bins = 20, alpha = 0.25) +
  stat_function(
    fun = stats::dnorm,
    args = list(
      mean = mean(df_clean$age),
      sd = sd(df_clean$age)
    ),
    color = "#E69F00",
    linewidth = 1,
  ) +
  labs(title = "Density Plot for Age") +
  theme_bw()

bmi_den <- ggplot(df_clean, aes(x = bmi)) +
  geom_density(fill = "#56B4E9", alpha = 0.25) +
  geom_histogram(aes(y = after_stat(density)), bins = 20, alpha = 0.25) +
  stat_function(
    fun = stats::dnorm,
    args = list(
      mean = mean(df_clean$bmi, na.rm = TRUE),
      sd = sd(df_clean$bmi, na.rm = TRUE)
    ),
    color = "#56B4E9",
    linewidth = 1,
  ) +
  labs(title = "Density Plot for BMI") +
  theme_bw()

alb_den <- ggplot(df_clean, aes(x = alb_surg)) +
  geom_density(fill = "#009E73", alpha = 0.25) +
  geom_histogram(aes(y = after_stat(density)), bins = 20, alpha = 0.25) +
  stat_function(
    fun = stats::dnorm,
    args = list(
      mean = mean(df_clean$alb_surg, na.rm = TRUE),
      sd = sd(df_clean$alb_surg, na.rm = TRUE)
    ),
    color = "#009E73",
    linewidth = 1,
  ) +
  labs(title = "Density Plot for albumine at surgery") +
  theme_bw()

yrd_den <- ggplot(df_clean, aes(x = years_disease)) +
  geom_density(fill = "#F0E442", alpha = 0.25) +
  geom_histogram(aes(y = after_stat(density)), bins = 20, alpha = 0.25) +
  stat_function(
    fun = stats::dnorm,
    args = list(
      mean = mean(df_clean$years_disease, na.rm = TRUE),
      sd = sd(df_clean$years_disease, na.rm = TRUE)
    ),
    color = "#F0E442",
    size = 1,
  ) +
  labs(title = "Density Plot for years of disease") +
  theme_bw()

srd_den <- ggplot(df_clean, aes(x = surg_durat)) +
  geom_density(fill = "#0072B2", alpha = 0.25) +
  geom_histogram(aes(y = after_stat(density)), bins = 20, alpha = 0.25) +
  stat_function(
    fun = stats::dnorm,
    args = list(
      mean = mean(df_clean$surg_durat, na.rm = TRUE),
      sd = sd(df_clean$surg_durat, na.rm = TRUE)
    ),
    color = "#0072B2",
    linewidth = 1,
  ) +
  labs(title = "Density Plot for surgery_duration") +
  theme_bw()

cowplot::plot_grid(age_den, bmi_den, alb_den, yrd_den, srd_den, nrow = 3, ncol = 2)

Code
df_clean %>%
  rstatix::shapiro_test(age, bmi, alb_surg, years_disease, surg_durat) %>%
  gt::gt() %>%
  gt::tab_header(
    title = gt::md("Shapiro-Wilk test for normality for **age**, **BMI**, **albumine**, **years of disease**, **surgery duration**"),
  )
Shapiro-Wilk test for normality for age, BMI, albumine, years of disease, surgery duration
variable statistic p
age 0.9644542 1.922530e-09
alb_surg 0.9694311 8.267980e-08
bmi 0.9027610 8.395013e-17
surg_durat 0.9762198 1.369909e-06
years_disease 0.9290085 4.807322e-14
Code
#> gt::tab_caption(caption = gt::md("Shapiro-Wilk test for normality for **age** and **BMI**"))

Description

Visualizations and tables using gtsummary package [4].

Code
theme_gtsummary_compact()
df_small <- df_clean %>%
  select(
    "age",
    "sex",
    "bmi",
    starts_with("cci"),
    starts_with("dis_be"),
    starts_with("location_previous_crohn___"),
    starts_with("prev_abd_surg"),
    "years_disease",
    "asa_score",
    "admiss_date",
    "disch_date",
    "surg_date",
    starts_with("inflix_"),
    starts_with("adalim_"),
    starts_with("vedo"),
    starts_with("uste"),
    "pre_op_atb_yn",
    starts_with("abscess"),
    "perforation",
    starts_with("location_disease___"),
    "surg_approach",
    "surg_timing",
    "surg_durat",
    "surg_disease_multi",
    "recurrence",
    "resection",
    "stricturoplasty",
    "fistulectomy",
    "perianal_surg",
    "stoma",
    "surg_procedure",
    "bowel_obstr",
    starts_with("steroid"),
    starts_with("biologic"),
  )

general_vars <- c(
  "age",
  "sex.factor",
  "bmi",
  #> "cci_gt2", -- use continuous
  "cci_total_sc",
  "asa_score",
  "prev_surg_yn",
  "years_disease",
  "dis_behavior",
  "pre_op_atb_yn",
  "surg_timing",
  "surg_approach",
  "surg_procedure_simple",
  "surg_durat",
  "surg_disease_multi",
  "recurrence",
  #> "abscess",
  "abscess_multi",
  "alb_surg"
)
steroid_vars <- c(
  "steroid_surg_yn",
  "steroid_surg_dose"
)
biologic_vars <- c(
  # "biologic_surg_8w_yn",
  "previous_biologics",
  "biologic_surg_8w_new",
  "infliximab_ongoing",
  "adalimumab_ongoing",
  "vedolizumab_ongoing",
  "ustekinumab_ongoing",
  "inflix_surg_8w",
  "adalim_surg_8w",
  "vedoli_surg_8w",
  "usteki_surg_8w",
  "biologics_num"
)

df_fortab_tesi <- df_clean %>%
  select(all_of(general_vars), "po_inf_30d") %>%
  relocate(cci_total_sc, .after = "bmi") %>%
  # relocate(cci_gt2, .after = "bmi") %>%
  relocate(surg_durat, .after = "surg_timing") %>%
  relocate(asa_score, .after = "cci_total_sc") %>%
  relocate(prev_surg_yn, .after = "asa_score") %>%
  relocate(years_disease, .after = "prev_surg_yn") # %>%
# mutate(sex.factor = structure(sex.factor, label = "Gender"))

gt_label_units <- function(data) {
  lapply(data, function(x) {
    label <- attr(x, "label")
    units <- attr(x, "units")
    if (!is.null(units)) {
      paste(label, paste0("(", units, ")"))
    } else {
      label
    }
  })
}

general <- df_fortab_tesi %>%
  tbl_summary(
    label = gt_label_units(.),
    # asa_score = "ASA score",
    # prev_surg_yn = "Any previous major abdominal surgery",
    # years_disease = "Years since diagnosis at the time of surgery",
    # dis_behavior = "Disease behaviour",
    # pre_op_atb_yn = "Antibiotics within 4 weeks before surgery",
    # # training = "Training surgery",
    # surg_timing = "Surgery timing",
    # surg_approach = "Surgical approach",
    # surg_durat = "Surgery duration, minutes",
    # surg_disease_multi = "Multiple localizations of disease requiring surgery",
    # recurrence = "Surgery involving previoulsy operated site for recurrence of disease"
    # ),
    data = ., by = po_inf_30d,
    missing_text = "Missing",
    missing = "ifany",
    type = list(cci_total_sc ~ "continuous2", all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c(
        "{median} ({p25}, {p75})",
        "{min}, {max}"
      ),
      all_categorical() ~ "{n} / {N} ({p}%)"
    ),
  ) %>%
  add_p(
    test.args = all_tests("chisq.test") ~ list(correct = TRUE),
    pvalue_fun = label_style_pvalue(digits = 3)
  ) %>%
  separate_p_footnotes() %>%
  bold_p() %>%
  add_overall() %>%
  bold_labels()

corticosteroids <- df_clean %>%
  select(all_of(steroid_vars), "po_inf_30d") %>%
  tbl_summary(
    data = ., by = po_inf_30d, missing_text = "(Missing)",
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} / {N} ({p}%)"
    ),
    missing = "no",
  ) %>%
  add_p(
    test.args = all_tests("chisq.test") ~ list(correct = TRUE),
    pvalue_fun = label_style_pvalue(digits = 3)
  ) %>%
  separate_p_footnotes() %>%
  bold_p() %>%
  add_overall() %>%
  bold_labels()

biologics <- df_clean %>%
  select(all_of(biologic_vars), "po_inf_30d") %>%
  tbl_summary(
    data = ., by = po_inf_30d, missing_text = "(Missing)",
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} / {N} ({p}%)"
    ),
    missing = "no",
  ) %>%
  add_p(
    test.args = all_tests("chisq.test") ~ list(correct = TRUE),
    pvalue_fun = label_style_pvalue(digits = 3)
  ) %>%
  separate_p_footnotes() %>%
  bold_p() %>%
  add_overall() %>%
  bold_labels()

gtsummary::tbl_stack(list(general, corticosteroids, biologics), group_header = c("", "Corticosteroids", "Biologics")) %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Post-surgery infection within 30 days**") %>%
  modify_table_body(filter, !(var_type == "categorical" & row_type == "missing")) %>%
  #> modify_caption("**Table 1. Patient characteristics and univariate analysis**") %>%
  #> modify_caption("<div style='text-align: left; font-weight: bold'>Table 1. Patient characteristics and univariate analysis of risk factors for post-surgical infections</div>") %>%
  gtsummary::as_gt() %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold", align = "center"),
    locations = gt::cells_row_groups(groups = everything())
  ) #> %>%
Characteristic Overall
N = 4851
Post-surgery infection within 30 days
p-value
No
N = 3901
Yes
N = 951
Age (years)


0.0642
    Median (Q1, Q3) 41 (30, 53) 40 (29, 52) 45 (31, 57)
    Min, Max 18, 77 18, 77 18, 77
Gender


0.1393
    Female 201 / 485 (41%) 168 / 390 (43%) 33 / 95 (35%)
    Male 284 / 485 (59%) 222 / 390 (57%) 62 / 95 (65%)
BMI (kg/m²)


0.7752
    Median (Q1, Q3) 21.6 (19.3, 24.2) 21.6 (19.3, 24.2) 21.5 (19.3, 24.1)
    Min, Max 13.9, 52.6 13.9, 52.6 15.0, 33.8
    Missing 12 10 2
Charlson Comorbidity Index score


0.0352
    Median (Q1, Q3) 0 (0, 1) 0 (0, 1) 1 (0, 2)
    Min, Max 0, 8 0, 8 0, 6
ASA score


0.1524
    1 23 / 475 (4.8%) 18 / 383 (4.7%) 5 / 92 (5.4%)
    2 345 / 475 (73%) 286 / 383 (75%) 59 / 92 (64%)
    3 106 / 475 (22%) 78 / 383 (20%) 28 / 92 (30%)
    4 1 / 475 (0.2%) 1 / 383 (0.3%) 0 / 92 (0%)
Any previous major abdominal surgery 377 / 485 (78%) 302 / 390 (77%) 75 / 95 (79%) 0.7513
Disease duration (years)


0.0882
    Median (Q1, Q3) 10 (4, 19) 10 (4, 17) 11 (5, 22)
    Min, Max 0, 46 0, 40 0, 46
    Missing 20 18 2
Disease behavior


0.9623
    Stenotic 219 / 477 (46%) 178 / 384 (46%) 41 / 93 (44%)
    Fistulizing 111 / 477 (23%) 89 / 384 (23%) 22 / 93 (24%)
    Stenotic + fistulizing 97 / 477 (20%) 78 / 384 (20%) 19 / 93 (20%)
    None 50 / 477 (10%) 39 / 384 (10%) 11 / 93 (12%)
Antibiotics within 4 weeks before surgery 70 / 481 (15%) 56 / 389 (14%) 14 / 92 (15%) 0.8413
    Missing 4 1 3
Surgical timing


0.1404
    Elective 469 / 479 (98%) 379 / 385 (98%) 90 / 94 (96%)
    Urgent 9 / 479 (1.9%) 5 / 385 (1.3%) 4 / 94 (4.3%)
    Emergency 1 / 479 (0.2%) 1 / 385 (0.3%) 0 / 94 (0%)
Surgery duration (minutes)


0.0632
    Median (Q1, Q3) 195 (150, 250) 195 (150, 245) 210 (180, 250)
    Min, Max 10, 575 10, 485 16, 575
    Missing 46 40 6
Surgical approach


0.1594
    Laparoscopy 234 / 475 (49%) 196 / 380 (52%) 38 / 95 (40%)
    Laparoscopic converted to open 54 / 475 (11%) 42 / 380 (11%) 12 / 95 (13%)
    Hand-assisted 5 / 475 (1.1%) 5 / 380 (1.3%) 0 / 95 (0%)
    Open 170 / 475 (36%) 126 / 380 (33%) 44 / 95 (46%)
    Robotic 11 / 475 (2.3%) 10 / 380 (2.6%) 1 / 95 (1.1%)
    Robotic converted to open 1 / 475 (0.2%) 1 / 380 (0.3%) 0 / 95 (0%)
Surgical procedure (simple)


0.2924
    Other procedures 6 / 480 (1.3%) 6 / 386 (1.6%) 0 / 94 (0%)
    Stoma closure 17 / 480 (3.5%) 14 / 386 (3.6%) 3 / 94 (3.2%)
    Stricturoplasty, fistulectomy or perianal surgery 29 / 480 (6.0%) 27 / 386 (7.0%) 2 / 94 (2.1%)
    Resection (non ileocecal) 179 / 480 (37%) 139 / 386 (36%) 40 / 94 (43%)
    Resection ileocecal 249 / 480 (52%) 200 / 386 (52%) 49 / 94 (52%)
Multiple localizations of disease requiring surgery 223 / 468 (48%) 177 / 375 (47%) 46 / 93 (49%) 0.6963
    Missing 17 15 2
Recurrence on previously operated site 133 / 484 (27%) 98 / 389 (25%) 35 / 95 (37%) 0.0233
    Missing 1 1 0
Abscesses at surgery


0.5784
    No abscess 375 / 480 (78%) 304 / 387 (79%) 71 / 93 (76%)
    Single abscess 87 / 480 (18%) 70 / 387 (18%) 17 / 93 (18%)
    Multiple abscesses 18 / 480 (3.8%) 13 / 387 (3.4%) 5 / 93 (5.4%)
Albumin at surgery (g/dL)


0.3312
    Median (Q1, Q3) 3.80 (3.50, 4.10) 3.80 (3.50, 4.10) 3.80 (3.50, 4.00)
    Min, Max 2.00, 6.40 2.00, 6.10 2.20, 6.40
    Missing 56 44 12
Corticosteroids
Any oral steroids ongoing at surgery 154 / 484 (32%) 124 / 389 (32%) 30 / 95 (32%) 0.9553
Dose of oral steroids ongoing at surgery


0.2764
    No steroids 330 / 481 (69%) 265 / 386 (69%) 65 / 95 (68%)
    Other steroid 77 / 481 (16%) 66 / 386 (17%) 11 / 95 (12%)
    Prednisone < 20 mg/day 21 / 481 (4.4%) 17 / 386 (4.4%) 4 / 95 (4.2%)
    Prednisone > 20 mg/day 53 / 481 (11%) 38 / 386 (9.8%) 15 / 95 (16%)
Biologics
Any biologic in patient history 273 / 484 (56%) 219 / 389 (56%) 54 / 95 (57%) 0.9243
Exposed to biologics within 8 weeks from surgery vs all others 52 / 479 (11%) 47 / 387 (12%) 5 / 92 (5.4%) 0.0633
Infliximab within 8 weeks of surgery vs more


0.1204
    0 123 / 133 (92%) 95 / 105 (90%) 28 / 28 (100%)
    1 10 / 133 (7.5%) 10 / 105 (9.5%) 0 / 28 (0%)
Adalimumab within 8 weeks of surgery vs more


0.3744
    0 190 / 200 (95%) 157 / 167 (94%) 33 / 33 (100%)
    1 10 / 200 (5.0%) 10 / 167 (6.0%) 0 / 33 (0%)
Vedolizumab within 8 weeks of surgery vs more


>0.9994
    0 40 / 57 (70%) 31 / 45 (69%) 9 / 12 (75%)
    1 17 / 57 (30%) 14 / 45 (31%) 3 / 12 (25%)
Ustekinumab within 8 weeks of surgery vs more


0.4814
    0 41 / 56 (73%) 31 / 44 (70%) 10 / 12 (83%)
    1 15 / 56 (27%) 13 / 44 (30%) 2 / 12 (17%)
Infliximab within 8 weeks from surgery (vs last dose 2-6 months before) 10 / 30 (33%) 10 / 27 (37%) 0 / 3 (0%) 0.5324
Adalimumab within 8 weeks from surgery (vs last dose 2-6 months before) 10 / 42 (24%) 10 / 36 (28%) 0 / 6 (0%) 0.3084
Vedolizumab within 8 weeks from surgery (vs last dose 2-6 months before) 17 / 31 (55%) 14 / 24 (58%) 3 / 7 (43%) 0.6714
Ustekinumab within 8 weeks from surgery (vs last dose 2-6 months before) 14 / 32 (44%) 12 / 28 (43%) 2 / 4 (50%) >0.9994
Number of biologics before surgery


0.4854
    0 211 / 484 (44%) 170 / 389 (44%) 41 / 95 (43%)
    1 140 / 484 (29%) 118 / 389 (30%) 22 / 95 (23%)
    2 80 / 484 (17%) 60 / 389 (15%) 20 / 95 (21%)
    3 44 / 484 (9.1%) 34 / 389 (8.7%) 10 / 95 (11%)
    4 9 / 484 (1.9%) 7 / 389 (1.8%) 2 / 95 (2.1%)
1 n / N (%)
2 Wilcoxon rank sum test
3 Pearson’s Chi-squared test
4 Fisher’s exact test
Code
#> gt::opt_table_font(font = c("Aptos", gt::default_fonts()))
#> gt::tab_options(table.font.names = c("Aptos", gt::default_fonts())) %>%
#> save table as image
#> gt::gtsave(filename = "table1.png")

#> reset_gtsummary_theme()

Regression

All analyses are done with survival package [5].

The final model chosen uses:

  • Gender
  • Charlson Comorbidity Score (as continuous variable)
  • albumin at surgery
  • abscess at surgery
  • recurrence on previously operated site
  • oral steroids exposure
  • biologic therapy within 8 weeks from surgery
  • number of biologic drugs in clinical history

Previous surgery was excluded as recurrence == "No" perfectly predicts this variable.

Biologic exposure was derived as follows:

  • exposure for each drug at surgery with the following sequential criteria
    • if end time over 8 weeks before or start time after surgery → not exposed
    • if end time within 8 weeks from surgery → exposed
    • if start time available, end time not available but no reasons for discontinuation → exposed
    • in all other cases the exposure was set to missing
  • exposure to any biologic drug with the following sequential criteria
    • if previous_biologic == 0 → not exposed
    • if exposed to any drug → exposed
    • if all drug exposures missing → missing
    • if at least one drug not exposed and others missing (which is to say all drugs either not exposed or missing) → not exposed
Code
df_cox_extra <- df_clean %>%
  mutate(
    po_inf_time = case_when(
      is.na(po_inf_time) ~ 30,
      po_inf_time < 0 ~ 30,
      po_inf_time > 30 ~ 30,
      .default = as.numeric(po_inf_time),
    ),
    po_inf_30d = ifelse(po_inf_30d == "Yes", 1, 0),
    surg_procedure_simple = forcats::fct_collapse(
      surg_procedure_simple,
1      "Non-resection" = c("Stoma closure", "Stricturoplasty, fistulectomy or perianal surgery", "Other procedures"),
      "Resection" = c("Resection (non ileocecal)", "Resection ileocecal")
    ),
    sex.factor = structure(sex.factor, label = "Gender"),
  ) %>%
  filter(po_inf_time >= 0) %>% # vs > 0
  select(
    study_id, po_inf_time, po_inf_30d, sex.factor, cci_total_sc, prev_surg_yn,
    alb_surg, recurrence, abscess, abscess_multi, surg_procedure_simple,
    steroid_surg_yn, biologic_surg_8w_yn, biologic_surg_8w_new, biologics_num,
    surg_durat
  )

2model_cox_extra <- survival::coxph(
  survival::Surv(po_inf_time, po_inf_30d) ~ (sex.factor + cci_total_sc +
    recurrence + alb_surg + steroid_surg_yn + biologic_surg_8w_new +
    abscess + surg_procedure_simple + biologics_num),
  id = study_id, data = df_cox_extra, model = TRUE
3)

model_cox_extra_ph_test <-
  survival::cox.zph(model_cox_extra, singledf = TRUE, global = FALSE) %>%
  purrr::pluck("table") %>%
  as.data.frame() %>%
  tibble::rownames_to_column("variable") %>%
  tibble::as_tibble() %>%
  select(variable, ph.test_p.value = p) %>%
  dplyr::mutate(
    # add a column for merging with gtsummary table
    row_type = "label",
    ph.test_p.value = style_pvalue(ph.test_p.value)
  )

gtsummary::tbl_regression(
  model_cox_extra,
  exponentiate = TRUE, estimate_fun = purrr::partial(gtsummary::style_ratio, digits = 3),
  pvalue_fun = purrr::partial(gtsummary::style_sigfig, digits = 3)
) %>%
  add_vif(statistic = "aGVIF") %>%
  add_glance_source_note(
    label = list(
      concordance = "Concordance",
      statistic.log = "Likelihood ratio test",
      p.value.log = "Likelihood ratio p-value"
    ),
    include = c(concordance, statistic.log, p.value.log)
  ) %>%
  # merge in PH test p-values
  modify_table_body(
    ~ .x %>%
      dplyr::left_join(
        model_cox_extra_ph_test,
        by = c("variable", "row_type")
      )
  ) %>%
  #> assign label to PH Test p-value
  modify_header(ph.test_p.value = "**PH Test**") %>%
  bold_labels() %>%
  # modify_header(p.value = "&#x1D45D", text_interpret = "html") %>%
  bold_p()

survminer::ggcoxdiagnostics(model_cox_extra, type = "dfbeta")
1
create surgical procedure by collapsing resections and other types of procedures
2
model with abscesses, albumin at surgery and type of surgical procedure
3
using new biologic_surg_8w_new instead of _yn and excluding prev_surg_yn
`geom_smooth()` using formula = 'y ~ x'

Code
survminer::ggcoxzph(survival::cox.zph(model_cox_extra), font.main = 10, font.x = 8, font.y = 8)

Characteristic HR1 95% CI1 p-value Adjusted GVIF2,1 PH Test
Gender


1.0 0.4
    Female


    Male 1.382 0.864, 2.212 0.177

Charlson Comorbidity Index score 1.136 0.974, 1.326 0.104 1.1 0.6
Recurrence on previously operated site


1.1 0.4
    No


    Yes 1.681 1.036, 2.729 0.036

Albumin at surgery 0.974 0.632, 1.500 0.903 1.0 0.7
Any oral steroids ongoing at surgery


1.0 0.5
    No


    Yes 0.790 0.485, 1.287 0.345

Exposed to biologics within 8 weeks from surgery vs all others


1.0 0.8
    No


    Yes 0.213 0.051, 0.892 0.034

Abscess at surgery


1.1 0.5
    No


    Yes 1.228 0.696, 2.167 0.479

Surgical procedure (simple)


1.0 0.11
    Non-resection


    Resection 11.49 1.580, 83.58 0.016

Number of biologics before surgery


1.0 0.6
    0


    1 1.064 0.602, 1.879 0.832

    2 1.756 0.944, 3.264 0.075

    3 1.282 0.521, 3.155 0.588

    4 2.516 0.589, 10.75 0.213

Concordance = 0.669; Likelihood ratio test = 32.6; Likelihood ratio p-value = 0.001
1 HR = Hazard Ratio, CI = Confidence Interval, GVIF = Generalized Variance Inflation Factor
2 GVIF^[1/(2*df)]

DB differences

We analyze database differences importing the data from the SPSS dataset and comparing with the .csv dataset using the package arsenal [6].

Code
df_matteo <- haven::read_sav("Crohn tesi 2024-12-21_cox.sav")
df_cox_matteo <- df_matteo |>
  mutate(
    study_id = as.character(study_id),
    #> sex = case_when(
    #>   study_id == 2180024326 ~ 0,
    #>   study_id == 2971033025 ~ 1,
    #>   study_id == 1039013496 ~ 1,
    #>   study_id == 2071039462 ~ 1,
    #>   study_id == "1399.049.496" ~ 1,
    #>   .default = sex,
    #> ),
    #> alb_surg = case_when(
    #>   study_id == "1129.028.932" ~ 3.5,
    #>   study_id == "1129.029.624" ~ 4.1,
    #>   study_id == "1469030052" ~ 4,
    #>   study_id == "1509028212" ~ 4,
    #>   study_id == "1609153250" ~ 3.5,
    #>   study_id == "1819028829" ~ 3.9,
    #>   study_id == "2061023413" ~ 4.01,
    #>   study_id == "2070045907" ~ 3.9,
    #>   study_id == "2071039462" ~ 4.2,
    #>   study_id == "2111.017.128" ~ 3.4,
    #>   study_id == "2341022000" ~ 4.4,
    #>   study_id == "2390022853" ~ 4.49,
    #>   study_id == "2402152156" ~ 3.74,
    #>   study_id == "2422046048" ~ 4.4,
    #>   study_id == "2431022794" ~ 3.7,
    #>   study_id == "2712027023" ~ 3.76,
    #>   study_id == "2750020936" ~ 3.8,
    #>   study_id == "2771022836" ~ 4.3,
    #>   study_id == "2832035063" ~ 3.8,
    #>   .default = alb_surg,
    #> ),
    sex.factor = haven::as_factor(sex),
    recurrence = structure(haven::as_factor(recurrence), label = "Recurrence on previously operated site"),
    abscess = structure(haven::as_factor(abscess), label = "Abscess at surgery"),
    cci_total_sc = structure(as.integer(cci_total_sc), label = "Charlson Comorbidity Index score"),
    alb_surg = structure(as.numeric(alb_surg), label = "Albumin at surgery", units = "g/dL"),
    steroid_surgery14days = haven::as_factor(steroid_surgery14days),
    steroid_surgery14days = structure(forcats::fct_recode(steroid_surgery14days, "No" = "0", "Yes" = "1"),
      label = "Any oral steroids ongoing at surgery"
    ),
    prev_surgery = haven::as_factor(prev_surgery),
    prev_surgery = structure(forcats::fct_recode(prev_surgery, "No" = "0", "Yes" = "1"),
      label = "Any previous major abdominal surgery"
    ),
    Resection = haven::as_factor(Resection),
    Resection = structure(forcats::fct_recode(Resection, "Non-resection" = "0", "Resection" = "1"),
      label = "Surgical procedure"
    ),
    infezione30d = as.numeric(infezione30d),
    surgery_to_inf = as.numeric(surgery_to_inf),
    surgery_to_infection_COX = as.numeric(surgery_to_infection_COX),
    bio_wt8w_vsallother = haven::as_factor(bio_wt8w_vsallother),
    bio_wt8w_vsallother = structure(forcats::fct_recode(bio_wt8w_vsallother, "No" = "0", "Yes" = "1"),
      label = "Exposed to biologics within 8 weeks from surgery vs more or never"
    ),
    N_biologics = structure(haven::as_factor(N_biologics), label = "Number of biologics before surgery"),
  ) |>
  select(
    study_id, # same
    sex.factor, # mutated from df_matteo$sex
    cci_total_sc, # mutated
    recurrence, # mutated
    abscess, # mutated
    alb_surg, # same
    infezione30d, # po_inf_30d
    surgery_to_inf, # po_inf_time before transformation
    surgery_to_infection_COX, # po_inf_time
    discont_to_sugery, # missing
    bio_whitin_8weeks, # missing
    bio_whitin_4weeks, # missing
    bio_wt8w_vsallother, # biologic_surg_8w_yn
    bio_wt4w_vsallother, # missing
    Resection, # surg_procedure_simple
    steroid_end_to_surgery, # missing
    steroid_start_to_surgery, # missing
    steroid_surgery14days, # steroid_surg_yn
    prev_surgery, # prev_surg_yn
    N_biologics # biologics_num
  ) |>
  rename(
    po_inf_30d = infezione30d,
    po_inf_time = surgery_to_infection_COX,
    biologic_surg_8w_yn = bio_wt8w_vsallother,
    surg_procedure_simple = Resection,
    steroid_surg_yn = steroid_surgery14days,
    prev_surg_yn = prev_surgery,
    biologics_num = N_biologics
  )

whats_different <- arsenal::comparedf(df_cox_matteo, df_cox_extra, by = "study_id")
summary(whats_different)


Table: Summary of data.frames

version   arg              ncol   nrow
--------  --------------  -----  -----
x         df_cox_matteo      20    487
y         df_cox_extra       16    485



Table: Summary of overall comparison

statistic                                                      value
------------------------------------------------------------  ------
Number of by-variables                                             1
Number of non-by variables in common                              12
Number of variables compared                                      12
Number of variables in x but not y                                 7
Number of variables in y but not x                                 3
Number of variables compared with some values unequal              9
Number of variables compared with all values equal                 3
Number of observations in common                                 485
Number of observations in x but not y                              2
Number of observations in y but not x                              0
Number of observations with some compared variables unequal       55
Number of observations with all compared variables equal         430
Number of values unequal                                          62



Table: Variables not shared

version   variable                    position  class   
--------  -------------------------  ---------  --------
x         surgery_to_inf                     8  numeric 
x         discont_to_sugery                 10  numeric 
x         bio_whitin_8weeks                 11  numeric 
x         bio_whitin_4weeks                 12  numeric 
x         bio_wt4w_vsallother               14  numeric 
x         steroid_end_to_surgery            16  numeric 
x         steroid_start_to_surgery          17  numeric 
y         abscess_multi                     10  factor  
y         biologic_surg_8w_new              14  factor  
y         surg_durat                        16  integer 



Table: Other variables not compared

                                 
 --------------------------------
 No other variables not compared 
 --------------------------------



Table: Observations not shared

version   study_id        observation
--------  -------------  ------------
x         1758.168.350            466
x         2110024554              464



Table: Differences detected by variable

var.x                   var.y                     n   NAs
----------------------  ----------------------  ---  ----
sex.factor              sex.factor                4     4
cci_total_sc            cci_total_sc              0     0
recurrence              recurrence                0     0
abscess                 abscess                   5     5
alb_surg                alb_surg                 19    19
po_inf_30d              po_inf_30d                0     0
po_inf_time             po_inf_time               4     0
biologic_surg_8w_yn     biologic_surg_8w_yn      17     6
surg_procedure_simple   surg_procedure_simple     5     5
steroid_surg_yn         steroid_surg_yn           1     1
prev_surg_yn            prev_surg_yn              2     0
biologics_num           biologics_num             5     1



Table: Differences detected (16 not shown)

var.x                   var.y                   study_id       values.x        values.y    row.x   row.y
----------------------  ----------------------  -------------  --------------  ---------  ------  ------
sex.factor              sex.factor              1039013496     NA              Male          251     477
sex.factor              sex.factor              1399.049.496   NA              Male          279     405
sex.factor              sex.factor              2071039462     NA              Male          384     151
sex.factor              sex.factor              2971033025     NA              Male          258     159
abscess                 abscess                 2190022752     No              NA            418     352
abscess                 abscess                 2231022792     No              NA            354     320
abscess                 abscess                 2300.038.000   No              NA             92     297
abscess                 abscess                 2841015373     No              NA            197     256
abscess                 abscess                 2882031350     No              NA            377      71
alb_surg                alb_surg                1129.028.932   NA              3.5           389     441
alb_surg                alb_surg                1129.029.624   NA              4.1           239     439
alb_surg                alb_surg                1469030052     NA              4              32     438
alb_surg                alb_surg                1509028212     NA              4              31     443
alb_surg                alb_surg                1609153250     NA              3.5            28     445
alb_surg                alb_surg                1819028829     NA              3.9           240     442
alb_surg                alb_surg                2061023413     NA              4.01          388     210
alb_surg                alb_surg                2070045907     NA              3.9            33     283
alb_surg                alb_surg                2071039462     NA              4.2           384     151
alb_surg                alb_surg                2111.017.128   NA              3.4            97     248
po_inf_time             po_inf_time             2112026730     6               30            246      75
po_inf_time             po_inf_time             2441026566     6               30            348     186
po_inf_time             po_inf_time             2512041485     6               30            290      41
po_inf_time             po_inf_time             2952036371     6               30            350      55
biologic_surg_8w_yn     biologic_surg_8w_yn     1029016050     No              NA            337     471
biologic_surg_8w_yn     biologic_surg_8w_yn     1129.029.624   No              NA            239     439
biologic_surg_8w_yn     biologic_surg_8w_yn     1899.022.067   No              NA            462     458
biologic_surg_8w_yn     biologic_surg_8w_yn     2022530658     No              NA            451      53
biologic_surg_8w_yn     biologic_surg_8w_yn     2222017534     No              Yes           435     100
biologic_surg_8w_yn     biologic_surg_8w_yn     2261034206     No              Yes           276     158
biologic_surg_8w_yn     biologic_surg_8w_yn     2271028772     No              Yes           444     202
biologic_surg_8w_yn     biologic_surg_8w_yn     2272029464     No              Yes           479      74
biologic_surg_8w_yn     biologic_surg_8w_yn     2381036009     No              Yes           468     141
biologic_surg_8w_yn     biologic_surg_8w_yn     2382020366     No              NA            333      90
surg_procedure_simple   surg_procedure_simple   2212035047     Non-resection   NA            219      54
surg_procedure_simple   surg_procedure_simple   2222017534     Non-resection   NA            435     100
surg_procedure_simple   surg_procedure_simple   2390022853     Non-resection   NA            289     348
surg_procedure_simple   surg_procedure_simple   2662030942     Non-resection   NA            439      73
surg_procedure_simple   surg_procedure_simple   2800022769     Non-resection   NA            352     351
steroid_surg_yn         steroid_surg_yn         2811111878     No              NA            212     252
prev_surg_yn            prev_surg_yn            2551047268     No              Yes           238     124
prev_surg_yn            prev_surg_yn            2871026699     Yes             No            452     211
biologics_num           biologics_num           1619028124     1               2             216     446
biologics_num           biologics_num           2031039026     2               3             356     156
biologics_num           biologics_num           2452032712     1               2             217      62
biologics_num           biologics_num           2811111878     0               NA            212     252
biologics_num           biologics_num           2940042906     1               2             215     289



Table: Non-identical attributes

var.x                   var.y                   name  
----------------------  ----------------------  ------
surg_procedure_simple   surg_procedure_simple   label 
Code
arsenal::diffs(whats_different, by.var = TRUE) |> knitr::kable()
var.x var.y n NAs
sex.factor sex.factor 4 4
cci_total_sc cci_total_sc 0 0
recurrence recurrence 0 0
abscess abscess 5 5
alb_surg alb_surg 19 19
po_inf_30d po_inf_30d 0 0
po_inf_time po_inf_time 4 0
biologic_surg_8w_yn biologic_surg_8w_yn 17 6
surg_procedure_simple surg_procedure_simple 5 5
steroid_surg_yn steroid_surg_yn 1 1
prev_surg_yn prev_surg_yn 2 0
biologics_num biologics_num 5 1
Code
arsenal::diffs(whats_different) |> knitr::kable()
var.x var.y study_id values.x values.y row.x row.y
sex.factor sex.factor 1039013496 NA Male 251 477
sex.factor sex.factor 1399.049.496 NA Male 279 405
sex.factor sex.factor 2071039462 NA Male 384 151
sex.factor sex.factor 2971033025 NA Male 258 159
abscess abscess 2190022752 No NA 418 352
abscess abscess 2231022792 No NA 354 320
abscess abscess 2300.038.000 No NA 92 297
abscess abscess 2841015373 No NA 197 256
abscess abscess 2882031350 No NA 377 71
alb_surg alb_surg 1129.028.932 NA 3.5 389 441
alb_surg alb_surg 1129.029.624 NA 4.1 239 439
alb_surg alb_surg 1469030052 NA 4 32 438
alb_surg alb_surg 1509028212 NA 4 31 443
alb_surg alb_surg 1609153250 NA 3.5 28 445
alb_surg alb_surg 1819028829 NA 3.9 240 442
alb_surg alb_surg 2061023413 NA 4.01 388 210
alb_surg alb_surg 2070045907 NA 3.9 33 283
alb_surg alb_surg 2071039462 NA 4.2 384 151
alb_surg alb_surg 2111.017.128 NA 3.4 97 248
alb_surg alb_surg 2341022000 NA 4.4 29 240
alb_surg alb_surg 2390022853 NA 4.49 289 348
alb_surg alb_surg 2402152156 NA 3.74 100 61
alb_surg alb_surg 2422046048 NA 4.4 332 31
alb_surg alb_surg 2431022794 NA 3.7 98 232
alb_surg alb_surg 2712027023 NA 3.76 168 76
alb_surg alb_surg 2750020936 NA 3.8 117 363
alb_surg alb_surg 2771022836 NA 4.3 30 234
alb_surg alb_surg 2832035063 NA 3.8 314 60
po_inf_time po_inf_time 2112026730 6 30 246 75
po_inf_time po_inf_time 2441026566 6 30 348 186
po_inf_time po_inf_time 2512041485 6 30 290 41
po_inf_time po_inf_time 2952036371 6 30 350 55
biologic_surg_8w_yn biologic_surg_8w_yn 1029016050 No NA 337 471
biologic_surg_8w_yn biologic_surg_8w_yn 1129.029.624 No NA 239 439
biologic_surg_8w_yn biologic_surg_8w_yn 1899.022.067 No NA 462 458
biologic_surg_8w_yn biologic_surg_8w_yn 2022530658 No NA 451 53
biologic_surg_8w_yn biologic_surg_8w_yn 2222017534 No Yes 435 100
biologic_surg_8w_yn biologic_surg_8w_yn 2261034206 No Yes 276 158
biologic_surg_8w_yn biologic_surg_8w_yn 2271028772 No Yes 444 202
biologic_surg_8w_yn biologic_surg_8w_yn 2272029464 No Yes 479 74
biologic_surg_8w_yn biologic_surg_8w_yn 2381036009 No Yes 468 141
biologic_surg_8w_yn biologic_surg_8w_yn 2382020366 No NA 333 90
biologic_surg_8w_yn biologic_surg_8w_yn 2452032712 No Yes 217 62
biologic_surg_8w_yn biologic_surg_8w_yn 2561046387 No Yes 396 127
biologic_surg_8w_yn biologic_surg_8w_yn 2581032843 No Yes 480 174
biologic_surg_8w_yn biologic_surg_8w_yn 2662059257 No Yes 244 20
biologic_surg_8w_yn biologic_surg_8w_yn 2681010361 No Yes 366 279
biologic_surg_8w_yn biologic_surg_8w_yn 2811111878 No NA 212 252
biologic_surg_8w_yn biologic_surg_8w_yn PPPMNL73R25C134R No Yes 454 4
surg_procedure_simple surg_procedure_simple 2212035047 Non-rese…. NA 219 54
surg_procedure_simple surg_procedure_simple 2222017534 Non-rese…. NA 435 100
surg_procedure_simple surg_procedure_simple 2390022853 Non-rese…. NA 289 348
surg_procedure_simple surg_procedure_simple 2662030942 Non-rese…. NA 439 73
surg_procedure_simple surg_procedure_simple 2800022769 Non-rese…. NA 352 351
steroid_surg_yn steroid_surg_yn 2811111878 No NA 212 252
prev_surg_yn prev_surg_yn 2551047268 No Yes 238 124
prev_surg_yn prev_surg_yn 2871026699 Yes No 452 211
biologics_num biologics_num 1619028124 1 2 216 446
biologics_num biologics_num 2031039026 2 3 356 156
biologics_num biologics_num 2452032712 1 2 217 62
biologics_num biologics_num 2811111878 0 NA 212 252
biologics_num biologics_num 2940042906 1 2 215 289
Code
dplyr::full_join(df_cox_matteo, df_clean, by = "study_id") |>
  filter(biologic_surg_8w_yn.x != biologic_surg_8w_yn.y) |>
  select(
    study_id, po_inf_30d.y, biologic_surg_8w_yn.x, biologic_surg_8w_yn.y, biologic_surg_8w_new,
    matches("(infliximab|adalimumab|vedolizumab|ustekinumab)_ongoing")
  )
# A tibble: 11 × 9
   study_id         po_inf_30d.y biologic_surg_8w_yn.x biologic_surg_8w_yn.y
   <chr>            <fct>        <fct>                 <fct>                
 1 2452032712       Yes          No                    Yes                  
 2 2662059257       No           No                    Yes                  
 3 2261034206       No           No                    Yes                  
 4 2681010361       No           No                    Yes                  
 5 2561046387       No           No                    Yes                  
 6 2222017534       No           No                    Yes                  
 7 2271028772       No           No                    Yes                  
 8 PPPMNL73R25C134R No           No                    Yes                  
 9 2381036009       Yes          No                    Yes                  
10 2272029464       No           No                    Yes                  
11 2581032843       Yes          No                    Yes                  
# ℹ 5 more variables: biologic_surg_8w_new <fct>, infliximab_ongoing <fct>,
#   adalimumab_ongoing <fct>, vedolizumab_ongoing <fct>,
#   ustekinumab_ongoing <fct>
Code
dplyr::full_join(df_cox_matteo, df_clean, by = "study_id") %>%
  gtsummary::tbl_summary(
    data = .,
    include = c(
      "previous_biologics", "biologic_surg_8w_yn.x", "biologic_surg_8w_yn.y",
      "biologic_surg_8w_new", "infliximab_ongoing", "adalimumab_ongoing",
      "vedolizumab_ongoing", "ustekinumab_ongoing"
    ),
    by = po_inf_30d.y,
    statistic = list(
      all_categorical() ~ "{n} / {N} ({p}%)"
    ),
  ) |>
  add_p(
    test.args = all_tests("chisq.test") ~ list(correct = TRUE),
    pvalue_fun = label_style_pvalue(digits = 3)
  ) |>
  separate_p_footnotes() |>
  bold_p() |>
  add_overall() |>
  bold_labels()
Characteristic Overall
N = 4851
No
N = 3901
Yes
N = 951
p-value
Any biologic in patient history 273 / 484 (56%) 219 / 389 (56%) 54 / 95 (57%) 0.9242
    Unknown 1 1 0
Exposed to biologics within 8 weeks from surgery vs more or never 46 / 485 (9.5%) 41 / 390 (11%) 5 / 95 (5.3%) 0.1172
Exposed to biologics within 8 weeks from surgery vs more or never 57 / 479 (12%) 49 / 387 (13%) 8 / 92 (8.7%) 0.2912
    Unknown 6 3 3
Exposed to biologics within 8 weeks from surgery vs all others 52 / 479 (11%) 47 / 387 (12%) 5 / 92 (5.4%) 0.0632
    Unknown 6 3 3
Infliximab within 8 weeks of surgery vs more


0.1203
    0 123 / 133 (92%) 95 / 105 (90%) 28 / 28 (100%)
    1 10 / 133 (7.5%) 10 / 105 (9.5%) 0 / 28 (0%)
    Unknown 352 285 67
Adalimumab within 8 weeks of surgery vs more


0.3743
    0 190 / 200 (95%) 157 / 167 (94%) 33 / 33 (100%)
    1 10 / 200 (5.0%) 10 / 167 (6.0%) 0 / 33 (0%)
    Unknown 285 223 62
Vedolizumab within 8 weeks of surgery vs more


>0.9993
    0 40 / 57 (70%) 31 / 45 (69%) 9 / 12 (75%)
    1 17 / 57 (30%) 14 / 45 (31%) 3 / 12 (25%)
    Unknown 428 345 83
Ustekinumab within 8 weeks of surgery vs more


0.4813
    0 41 / 56 (73%) 31 / 44 (70%) 10 / 12 (83%)
    1 15 / 56 (27%) 13 / 44 (30%) 2 / 12 (17%)
    Unknown 429 346 83
1 n / N (%)
2 Pearson’s Chi-squared test
3 Fisher’s exact test

Which biologic was someone exposed to?

Attempt to visualise which was the biologic drug in question.

Code
df_clean |>
  filter(if_any(matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"), ~ .x == 1)) |>
  select(
    matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"),
    biologic_surg_8w_new,
    previous_biologics,
    po_inf_30d
  ) |>
  mutate(
    which_biologic = factor(
      case_when(
        infliximab_ongoing == 1 ~ "Infliximab",
        adalimumab_ongoing == 1 ~ "Adalimumab",
        vedolizumab_ongoing == 1 ~ "Vedolizumab",
        ustekinumab_ongoing == 1 ~ "Ustekinumab",
        rowSums(across(matches("(infliximab|adalimumab|ustekinumab|vedolizumab)_ongoing"), as.numeric)) > 1 ~ "Multiple",
        .default = NA,
      ),
    ),
    po_inf_30d = paste("Post-surgical infection within 30 days", po_inf_30d),
  ) |>
  gtsummary::tbl_strata(
    strata = po_inf_30d,
    .tbl_fun =
      ~ .x |>
        tbl_summary(include = biologic_surg_8w_new, by = which_biologic, missing = "no") |>
        add_n() |>
        add_p(),
    .combine_with = "tbl_stack",
  )
Characteristic N Adalimumab
N = 101
Infliximab
N = 101
Ustekinumab
N = 131
Vedolizumab
N = 141
p-value2
Post-surgical infection within 30 days No
Exposed to biologics within 8 weeks from surgery vs all others 47 10 (100%) 10 (100%) 13 (100%) 14 (100%) >0.9
Post-surgical infection within 30 days Yes
Exposed to biologics within 8 weeks from surgery vs all others 5 0 (NA%) 0 (NA%) 2 (100%) 3 (100%) >0.9
1 n (%)
2 Fisher’s exact test

Redo Cox with Matteo

Code
model_cox_matteo <- survival::coxph(
  survival::Surv(po_inf_time, po_inf_30d) ~ (sex.factor + cci_total_sc +
    recurrence + alb_surg + steroid_surg_yn + biologic_surg_8w_yn + prev_surg_yn +
    abscess + surg_procedure_simple + biologics_num),
  id = study_id,
  data = df_cox_matteo |> filter(study_id != "1758.168.350") |> filter(study_id != "2110024554"),
  model = TRUE
)

model_cox_matteo_ph_test <-
  survival::cox.zph(model_cox_matteo, singledf = TRUE, global = FALSE) %>%
  purrr::pluck("table") %>%
  as.data.frame() %>%
  tibble::rownames_to_column("variable") %>%
  tibble::as_tibble() %>%
  select(variable, ph.test_p.value = p) %>%
  dplyr::mutate(
    # add a column for merging with gtsummary table
    row_type = "label",
    ph.test_p.value = style_pvalue(ph.test_p.value)
  )

gtsummary::tbl_regression(
  model_cox_matteo,
  exponentiate = TRUE, estimate_fun = purrr::partial(gtsummary::style_ratio, digits = 3),
  pvalue_fun = purrr::partial(gtsummary::style_sigfig, digits = 3)
) %>%
  add_vif(statistic = "aGVIF") %>%
  add_glance_source_note(
    label = list(
      concordance = "Concordance",
      statistic.log = "Likelihood ratio test",
      p.value.log = "Likelihood ratio p-value"
    ),
    include = c(concordance, statistic.log, p.value.log)
  ) %>%
  # merge in PH test p-values
  modify_table_body(
    ~ .x %>%
      dplyr::left_join(
        model_cox_extra_ph_test,
        by = c("variable", "row_type")
      )
  ) %>%
  #> assign label to PH Test p-value
  modify_header(ph.test_p.value = "**PH Test**") %>%
  bold_labels() %>%
  bold_p()
Characteristic HR1 95% CI1 p-value Adjusted GVIF2,1 PH Test
Gender


1.0 0.4
    Female


    Male 1.386 0.871, 2.205 0.169

Charlson Comorbidity Index score 1.146 0.986, 1.332 0.075 1.1 0.6
Recurrence on previously operated site


1.1 0.4
    No


    Yes 1.743 1.039, 2.926 0.035

Albumin at surgery 0.937 0.615, 1.429 0.763 1.0 0.7
Any oral steroids ongoing at surgery


1.0 0.5
    No


    Yes 0.826 0.508, 1.344 0.441

Exposed to biologics within 8 weeks from surgery vs more or never


1.0
    No


    Yes 0.375 0.115, 1.224 0.104

Any previous major abdominal surgery


1.1
    No


    Yes 0.901 0.486, 1.673 0.742

Abscess at surgery


1.1 0.5
    No


    Yes 1.049 0.579, 1.901 0.874

Surgical procedure


1.0 0.11
    Non-resection


    Resection 4.025 1.242, 13.04 0.020

Number of biologics before surgery


1.0 0.6
    0


    1 0.956 0.536, 1.703 0.878

    2 1.704 0.901, 3.221 0.101

    3 1.352 0.601, 3.042 0.466

    4 2.309 0.538, 9.920 0.260

Concordance = 0.663; Likelihood ratio test = 25.0; Likelihood ratio p-value = 0.023
1 HR = Hazard Ratio, CI = Confidence Interval, GVIF = Generalized Variance Inflation Factor
2 GVIF^[1/(2*df)]
Code
survminer::ggcoxdiagnostics(model_cox_matteo, type = "dfbeta")

Code
survminer::ggcoxzph(survival::cox.zph(model_cox_matteo), font.main = 10, font.x = 8, font.y = 8)

MICE

Here we attempt to replicate the analysis imputing values for alb_surg, which is the one missing from most entries and is normally distributed. For this we use the mice package [7].

Code
#> pacman::p_load(mice, cowplot, ggsci)
#> scales::show_col(ggsci::pal_nejm("default")(4))
df_imp <- df_cox_extra |>
  select(-study_id, -biologic_surg_8w_yn, -abscess_multi)

mice_imp <- data.frame(
  original = df_imp$alb_surg,
  imp_pmm = complete(mice::mice(df_imp,
    method = ifelse(colnames(df_imp) == "alb_surg", "pmm", ""),
    print = FALSE
  ))$alb_surg,
  imp_cart = complete(mice::mice(df_imp,
    method = ifelse(colnames(df_imp) == "alb_surg", "cart", ""),
    print = FALSE
  ))$alb_surg,
  imp_rf = complete(mice::mice(df_imp,
    method = ifelse(colnames(df_imp) == "alb_surg", "cart", ""),
    print = FALSE
  ))$alb_surg
)

h1 <- ggplot(mice_imp, aes(x = original)) +
  geom_histogram(fill = "#BC3C29FF", color = "#000000", position = "identity") +
  ggtitle("Original distribution") +
  theme_classic()
h2 <- ggplot(mice_imp, aes(x = imp_pmm)) +
  geom_histogram(fill = "#0072B5FF", color = "#000000", position = "identity") +
  ggtitle("PMM distribution") +
  theme_classic()
h3 <- ggplot(mice_imp, aes(x = imp_cart)) +
  geom_histogram(fill = "#E18727FF", color = "#000000", position = "identity") +
  ggtitle("CART distribution") +
  theme_classic()
h4 <- ggplot(mice_imp, aes(x = imp_rf)) +
  geom_histogram(fill = "#20854EFF", color = "#000000", position = "identity") +
  ggtitle("Random Forest distribution") +
  theme_classic()

cowplot::plot_grid(h1, h2, h3, h4, nrow = 2, ncol = 2)

Code
df_imp_res <- df_cox_extra |>
  select(-study_id, -biologic_surg_8w_yn, -abscess_multi) |>
  mice::mice(method = ifelse(colnames(df_imp) == "alb_surg", "cart", ""), print = FALSE)

mice::stripplot(df_imp_res, alb_surg ~ .imp, col = c("grey", mice::mdc(2)), pch = c(1, 20))

Code
mice::densityplot(df_imp_res, ~ alb_surg | .imp, pch = 20, cex = 1.5)

Code
model_cox_imp <- with(
  data = df_imp_res,
  #> broom::tidy(
  #> summary(
  survival::coxph(
    survival::Surv(po_inf_time, po_inf_30d) ~
      sex.factor + cci_total_sc + recurrence + alb_surg +
      steroid_surg_yn + biologic_surg_8w_new + biologics_num +
      abscess + surg_procedure_simple + prev_surg_yn
  ),
  #> exponentiate = TRUE,
  #> conf.int = TRUE
  #> )
)

summary(mice::pool(model_cox_imp)) |> knitr::kable()
term estimate std.error statistic df p.value
sex.factorMale 0.3473245 0.2267311 1.5318787 71.07183 0.1299918
cci_total_sc 0.1357433 0.0764380 1.7758605 71.07183 0.0800364
recurrenceYes 0.4491682 0.2543048 1.7662588 71.06154 0.0816472
alb_surg 0.0587630 0.2012136 0.2920430 70.44014 0.7711122
steroid_surg_ynYes -0.1828320 0.2376310 -0.7693946 71.07183 0.4442087
biologic_surg_8w_newYes -1.3803855 0.6054306 -2.2800063 71.07183 0.0256103
biologics_num1 -0.1139020 0.2862309 -0.3979375 71.07183 0.6918690
biologics_num2 0.5717519 0.2975060 1.9218163 71.07183 0.0586386
biologics_num3 0.4518178 0.4099118 1.1022319 71.07107 0.2740795
biologics_num4 0.6610508 0.7387029 0.8948805 71.07183 0.3738717
abscessYes 0.2831138 0.2685225 1.0543392 71.06155 0.2952993
surg_procedure_simpleResection 1.1731560 0.5251697 2.2338606 71.07183 0.0286400
prev_surg_ynYes 0.0475606 0.2991728 0.1589736 71.07042 0.8741406
Code
#> df_step <- df_cox_extra |>
#>   select(-study_id, -biologic_surg_8w_yn, -abscess_multi) |>
#>   na.omit()
#> # step_formula <- survival::Surv(po_inf_time, po_inf_30d) ~ .
#> step_formula = survival::Surv(po_inf_time, po_inf_30d) ~ (
#>         sex.factor + cci_total_sc + recurrence + alb_surg +
#>         steroid_surg_yn + biologic_surg_8w_new + biologics_num +
#>         abscess + surg_procedure_simple + prev_surg_yn)
#> StepReg::stepwise(
#>   formula = step_formula,
#>   data = df_step,
#>   type = "cox",
#>   strategy = "bidirection",
#>   test_method_cox = "efron",
#>   metric = "AICc",
#> )

Infections

Tables for infection types

Code
df_epi <- df_clean %>%
  filter(po_inf_30d == "Yes") %>%
  mutate(
    po_inf_type___1 = structure(factor(po_inf_type___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Bone and joint"
    ),
    po_inf_type___2 = structure(factor(po_inf_type___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "BSI"
    ),
    po_inf_type___3 = structure(factor(po_inf_type___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Central line associated BSI"
    ),
    po_inf_type___4 = structure(factor(po_inf_type___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Central nervous system"
    ),
    po_inf_type___5 = structure(factor(po_inf_type___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Cardiovascular"
    ),
    po_inf_type___6 = structure(factor(po_inf_type___6, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Intraabdominal, excluding organ/space SSI"
    ),
    po_inf_type___7 = structure(factor(po_inf_type___7, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Pneumonia"
    ),
    po_inf_type___8 = structure(factor(po_inf_type___8, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Surgical site infection"
    ),
    po_inf_type___9 = structure(factor(po_inf_type___9, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Skin and soft tissue, excluding superficial SSI"
    ),
    po_inf_type___10 = structure(factor(po_inf_type___10, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Urinary tract"
    ),
    po_inf_type___11 = structure(factor(po_inf_type___11, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
    po_inf_ia_loc___1 = structure(factor(po_inf_ia_loc___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Gastric"
    ),
    po_inf_ia_loc___2 = structure(factor(po_inf_ia_loc___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Hepato-biliary"
    ),
    po_inf_ia_loc___3 = structure(factor(po_inf_ia_loc___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Pancreatic"
    ),
    po_inf_ia_loc___4 = structure(factor(po_inf_ia_loc___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Small intestine"
    ),
    po_inf_ia_loc___5 = structure(factor(po_inf_ia_loc___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Colon"
    ),
    po_inf_ia_loc___6 = structure(factor(po_inf_ia_loc___6, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Multiple localisations"
    ),
    ssi_type = structure(
      factor(ssi_type,
        levels = c(1:3),
        labels = c("Superficial incisional SSI", "Deep incisional SSI", "Organ/space SSI")
      ),
      label = "Type of SSI"
    ),
  )

inf_type <- df_epi %>%
  select(starts_with("po_inf_type___")) %>%
  tbl_summary(data = .) %>%
  modify_table_body(
    ~ .x |> tibble::add_row(
      variable = "po_inf_type_group",
      label = "Type of post-surgery infection",
      .before = 1L
    )
  ) |>
  modify_column_indent(
    columns = "label",
    rows = !(label == "Type of post-surgery infection"),
    indent = 4L
  )

inf_type_ia <- df_epi %>%
  filter(po_inf_type___6 == "Yes") %>%
  select(starts_with("po_inf_ia_loc___")) %>%
  tbl_summary(data = .) |>
  modify_table_body(
    fun = ~ .x %>%
      mutate(
        stat = as.integer(gsub("\\(.*\\%\\)", "", stat_0))
      ) %>%
      arrange(ifelse(variable != "po_inf_ia_loc___6", desc(stat), match(variable, c("po_inf_ia_loc___6"))))
  ) |>
  modify_table_body(
    ~ .x |> tibble::add_row(
      variable = "po_inf_ia_loc_group",
      label = "Location of intra-abdominal infections",
      .before = 1L
    )
  ) |>
  modify_column_indent(
    columns = "label",
    rows = !(label == "Location of intra-abdominal infections"),
    indent = 4L
  )

inf_type_ssi <- df_epi %>%
  filter(po_inf_type___8 == "Yes") %>%
  select(ssi_type) %>%
  tbl_summary(data = .)

tbl_stack(list(inf_type, inf_type_ia, inf_type_ssi), quiet = TRUE) %>%
  modify_table_body(
    fun = ~ .x %>%
      arrange(
        match(variable, c("po_inf_type_group")),
        match(variable, "po_inf_type___8"),
        match(tbl_id1, c("3")),
        match(variable, "po_inf_type___6"),
        match(tbl_id1, c("2"))
      )
  ) %>%
  remove_row_type(ssi_type, type = "header") %>%
  remove_row_type("po_inf_ia_loc_group", type = "all") %>%
  modify_column_indent(
    columns = label,
    rows = (grepl("po_inf_ia_loc___", variable) | variable == "ssi_type"),
    indent = 8L
  )
Characteristic N = 951
Type of post-surgery infection
    Surgical site infection 48 (51%)
        Superficial incisional SSI 7 (15%)
        Deep incisional SSI 15 (31%)
        Organ/space SSI 26 (54%)
    Intraabdominal, excluding organ/space SSI 22 (23%)
        Small intestine 9 (41%)
        Colon 6 (27%)
        Hepato-biliary 3 (14%)
        Pancreatic 1 (4.5%)
        Gastric 0 (0%)
        Multiple localisations 9 (41%)
    Bone and joint 0 (0%)
    BSI 4 (4.2%)
    Central line associated BSI 4 (4.2%)
    Central nervous system 0 (0%)
    Cardiovascular 0 (0%)
    Pneumonia 22 (23%)
    Skin and soft tissue, excluding superficial SSI 0 (0%)
    Urinary tract 2 (2.1%)
    Other 11 (12%)
1 n (%)

Tables for epidemiology

Code
df_micro <- df_clean %>%
  select(
    "study_id",
    "po_inf_30d",
    "cre_colonisation_pre",
    "cre_colonisation_mechanism___1", # KPC
    "cre_colonisation_mechanism___2", # OXA-48
    "cre_colonisation_mechanism___3", # VIM
    "cre_colonisation_mechanism___4", # NDM
    "cre_colonisation_mechanism___5", # Other
    "micro_yn",
    "etiology_yn",
    "etiology_sample_ssi___1", # wound swab culture
    "etiology_sample_ssi___2", # purulent drainage culture
    "etiology_sample_ssi___3", # superficial tissue culture
    "etiology_sample_ssi___4", # deep tissue culture
    "etiology_sample_ssi___5", # ascitic fluid
    "etiology_sample_ssi___6", # organ biopsy
    "etiology_sample_ssi___7", # other
    "etiology_type___1", # Gram -
    "etiology_type___2", # Gram +
    "etiology_type___3", # Fungi
    "etiology_type___4", # other
    "etiology_type_other", # which other
    "etiology_gn___1", # Enterobacteriacea
    "etiology_gn___2", # Pseudomonas spp
    "etiology_gn___3", # Acinetobacter spp
    "etiology_gn___4", # Stenotrophomonas spp
    "etiology_gn___5", # other
    "etiology_gn_other", # which other
    "etiology_enterob___1", # Escherichia coli
    "etiology_enterob___2", # Klebsiella spp
    "etiology_enterob___3", # Enterobacter spp
    "etiology_enterob___4", # Citrobacter spp
    "etiology_enterob___5", # Serratia spp
    "etiology_enterob___6", # Morganella spp
    "etiology_enterob___7", # Proteus spp
    "etiology_enterob___8", # Salmonella spp
    "etiology_enterob___9", # other
    "etiology_gn_resistance___1", # None
    "etiology_gn_resistance___2", # 3rd generation cephalosporins
    "etiology_gn_resistance___3", # 4th generation cephalosporins
    "etiology_gn_resistance___4", # fluoroquinolones
    "etiology_gn_resistance___5", # BL/BLI
    "etiology_gn_resistance___6", # carbapenems
    "etiology_gn_res_blbli___1", # amoxicillin/clavulanate
    "etiology_gn_res_blbli___2", # piperacillin/tazobactam
    "etiology_gn_res_cr___1", # KPC
    "etiology_gn_res_cr___2", # OXA-48
    "etiology_gn_res_cr___3", # VIM
    "etiology_gn_res_cr___4", # NDM
    "etiology_gn_res_cr___5", # other
    "etiology_gp___1", # Staphylococcus aureus
    "etiology_gp___2", # Coagulase-negative Staphylococci
    "etiology_gp___3", # Streptococcus spp
    "etiology_gp___4", # Enterococcus faecalis
    "etiology_gp___5", # Enterococcus faecium
    "etiology_gp___6", # other
    "candida_type___1", # C. albicans
    "candida_type___2", # C. glabrata
    "candida_type___3", # C. parapsilosis
    "candida_type___4", # C. krusei
    "candida_type___5", # C. norvegensis
    "candida_type___6", # C. dublinensis
    "candida_type___7", # C. tropicalis
    "candida_type___8", # other Candida spp.
    "inf_other___1", # CMV
    "cmv_inf_dis", # dis vs inf
    "cmv_dis___1", # CMV syndrome
    "cmv_dis___2", # CMV GI disease
    "cmv_dis___3", # CMV pneumonia/bronchiolitis
    "cmv_dis___4", # CMV hepatitis
    "cmv_dis___5", # CMV retinitis
    "cmv_dis___6", # CMV encephalitis
    "cmv_dis___7", # other
    "inf_other___2", # C. diff
    "cdiff_micro___1", # toxin
    "cdiff_micro___2", # NAAT
    "cdiff_type___1", # first event
    "cdiff_type___2", # recurrence
    "cdiff_type___3", # relapse
  ) %>%
  mutate(
    etiology_enterob___6 = case_when(
      etiology_gn_other == "Morganella morganii" ~ 1,
      .default = etiology_enterob___6,
    ),
    etiology_gn___5 = case_when(
      etiology_gn_other == "Morganella morganii" ~ 0,
      .default = etiology_gn___5,
    ),
    cre_colonisation_pre = structure(
      factor(cre_colonisation_pre, levels = c(0, 1), labels = c("No", "Yes")),
      label = "CRE colonisation"
    ),
    cre_colonisation_mechanism___1 = structure(
      factor(cre_colonisation_mechanism___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "KPC"
    ),
    cre_colonisation_mechanism___2 = structure(
      factor(cre_colonisation_mechanism___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "OXA-48"
    ),
    cre_colonisation_mechanism___3 = structure(
      factor(cre_colonisation_mechanism___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "VIM"
    ),
    cre_colonisation_mechanism___4 = structure(
      factor(cre_colonisation_mechanism___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "NDM"
    ),
    cre_colonisation_mechanism___5 = structure(
      factor(cre_colonisation_mechanism___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
    micro_yn = structure(
      factor(micro_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Microbiological tests performed"
    ),
    etiology_yn = structure(
      factor(etiology_yn, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Etiology identified"
    ),
    etiology_type___1 = structure(
      factor(etiology_type___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Gram -"
    ),
    etiology_type___2 = structure(
      factor(etiology_type___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Gram +"
    ),
    etiology_type___3 = structure(
      factor(etiology_type___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Fungi"
    ),
    etiology_type___4 = structure(
      factor(etiology_type___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
    etiology_gn___1 = structure(
      factor(etiology_gn___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Enterobacteriacea"
    ),
    etiology_gn___2 = structure(
      factor(etiology_gn___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Pseudomonas spp"
    ),
    etiology_gn___3 = structure(
      factor(etiology_gn___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Acinetobacter spp"
    ),
    etiology_gn___4 = structure(
      factor(etiology_gn___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Stenotrophomonas spp"
    ),
    etiology_gn___5 = structure(
      factor(etiology_gn___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
    etiology_enterob___1 = structure(
      factor(etiology_enterob___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Escherichia coli"
    ),
    etiology_enterob___2 = structure(
      factor(etiology_enterob___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Klebsiella spp"
    ),
    etiology_enterob___3 = structure(
      factor(etiology_enterob___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Enterobacter spp"
    ),
    etiology_enterob___4 = structure(
      factor(etiology_enterob___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Citrobacter spp"
    ),
    etiology_enterob___5 = structure(
      factor(etiology_enterob___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Serratia spp"
    ),
    etiology_enterob___6 = structure(
      factor(etiology_enterob___6, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Morganella spp"
    ),
    etiology_enterob___7 = structure(
      factor(etiology_enterob___7, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Proteus spp"
    ),
    etiology_enterob___8 = structure(
      factor(etiology_enterob___8, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Salmonella spp"
    ),
    etiology_enterob___9 = structure(
      factor(etiology_enterob___9, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
    etiology_gp___1 = structure(
      factor(etiology_gp___1, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Staphylococcus aureus"
    ),
    etiology_gp___2 = structure(
      factor(etiology_gp___2, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Coagulase-negative Staphylococci"
    ),
    etiology_gp___3 = structure(
      factor(etiology_gp___3, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Streptococcus spp"
    ),
    etiology_gp___4 = structure(
      factor(etiology_gp___4, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Enterococcus faecalis"
    ),
    etiology_gp___5 = structure(
      factor(etiology_gp___5, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Enterococcus faecium"
    ),
    etiology_gp___6 = structure(
      factor(etiology_gp___6, levels = c(0, 1), labels = c("No", "Yes")),
      label = "Other"
    ),
  )

micro_cre <- df_micro %>%
  select(starts_with("cre_colonisation_")) %>%
  mutate(
    cre_col_mech = case_when(
      cre_colonisation_mechanism___1 == "Yes" ~ "KPC",
      cre_colonisation_mechanism___2 == "Yes" ~ "OXA-48",
      cre_colonisation_mechanism___3 == "Yes" ~ "VIM",
      cre_colonisation_mechanism___4 == "Yes" ~ "NDM",
      cre_colonisation_mechanism___5 == "Yes" ~ "Other",
      .default = "Not colonised",
    ),
    cre_col_mech = structure(factor(cre_col_mech, levels = c("Not colonised", "KPC", "OXA-48", "VIM", "NDM", "Other")),
      label = "CRE colonisation"
    )
  ) %>%
  select(cre_col_mech) %>%
  gtsummary::tbl_summary(
    data = .,
    missing_text = "Missing"
  ) %>%
  add_n() %>%
  modify_header(stat_0 = "**n (%)**") %>%
  modify_footnote(all_stat_cols() ~ NA)

micro_mic <- df_micro %>%
  filter(po_inf_30d == "Yes") %>%
  tbl_summary(
    data = .,
    include = c("micro_yn"),
    missing_text = "Missing",
  ) %>%
  add_n() %>%
  modify_header(stat_0 = "**n (%)**")

micro_eti <- df_micro %>%
  filter(po_inf_30d == "Yes" & micro_yn == "Yes") %>%
  tbl_summary(
    data = .,
    include = c("etiology_yn"),
    missing_text = "Missing",
  ) %>%
  add_n() %>%
  modify_header(stat_0 = "**n (%)**")

micro_type <- df_micro %>%
  filter(po_inf_30d == "Yes" & etiology_yn == "Yes") %>%
  select(starts_with("etiology_type___")) %>%
  mutate(
    etiology_poly = structure(
      factor(case_when(
        rowSums(across(starts_with("etiology_type___"), ~ ifelse(.x == "Yes", 1, 0))) > 1 ~ "Polymicrobial",
        rowSums(across(starts_with("etiology_type___"), ~ ifelse(.x == "Yes", 1, 0))) == 1 ~ "Monomicrobial",
      ), levels = c("Monomicrobial", "Polymicrobial")),
      label = "Polymicrobial infection"
    ),
    etiology_type.factor = factor(
      paste(
        ifelse(etiology_type___1 == "Yes", "Gram -", NA_character_),
        ifelse(etiology_type___2 == "Yes", "Gram +", NA_character_),
        ifelse(etiology_type___3 == "Yes", "Fungi", NA_character_),
        ifelse(etiology_type___4 == "Yes", "Other", NA_character_),
        sep = " & "
      ),
    ),
    etiology_type.factor = structure(glue::trim(gsub(" \\& NA|NA \\& ", "", etiology_type.factor)), label = "Etiology"),
  ) %>%
  relocate(etiology_poly, .before = "etiology_type___1") %>%
  tbl_summary(data = ., include = !starts_with("etiology_type___")) %>%
  #> bstfun::add_variable_grouping(
  #>   "Etiology" = c("etiology_type___1", "etiology_type___2", "etiology_type___3", "etiology_type___4")
  #> ) %>%
  add_n() %>%
  modify_header(stat_0 = "**n (%)**")

micro_path_gn <- df_micro %>%
  filter(po_inf_30d == "Yes" & etiology_yn == "Yes" & micro_yn == "Yes") %>%
  relocate(starts_with("etiology_enterob___"), .after = "etiology_gn___1") %>%
  select(
    starts_with("etiology_enterob___"),
    starts_with("etiology_gn___"),
    -etiology_gn___1,
    -etiology_enterob___9,
    #> starts_with("candida_type___")
  ) %>%
  tbl_summary(data = .) %>%
  add_n() %>%
  modify_header(stat_0 = "**n (%)**") %>%
  modify_table_body(
    fun = ~ .x %>%
      mutate(
        stat = as.integer(gsub("\\(.*\\%\\)", "", stat_0))
      ) %>%
      arrange(
        match(variable, c("Gram negative")),
        ifelse(match(variable, c("etiology_enterob___")), desc(stat), match(variable, c("etiology_gn___")))
      )
  ) |>
  gtsummary::modify_table_body(
    ~ .x |> tibble::add_row(variable = "grouping", label = "Gram negative", .before = 1L)
  ) |>
  modify_column_indent(columns = "label", rows = !(label == "Gram negative"), indent = 4L) |>
  modify_table_body(
    ~ .x |>
      dplyr::mutate(
        n = dplyr::case_when(
          label == "Gram negative" ~ as.character(max(as.integer(.x$n), na.rm = TRUE)),
          grepl("etiology_gn___", variable) ~ NA,
          grepl("etiology_enterob___", variable) ~ NA,
          .default = n
        )
      )
  )

micro_path_gp <- df_micro %>%
  dplyr::filter(po_inf_30d == "Yes" & etiology_yn == "Yes" & micro_yn == "Yes") %>%
  dplyr::select(
    starts_with("etiology_gp___"),
  ) %>%
  gtsummary::tbl_summary(data = .) %>%
  gtsummary::add_n() %>%
  gtsummary::modify_header(stat_0 = "**n (%)**") %>%
  # add a row for the grouping
  gtsummary::modify_table_body(
    ~ .x |> tibble::add_row(label = "Gram positive", .before = 1L)
  ) |>
  modify_column_indent(columns = "label", rows = !(label == "Gram positive"), indent = 4L) |>
  # finally, remove Ns for the subgroups, and add an N to the grouping level
  modify_table_body(
    ~ .x |>
      dplyr::mutate(
        n = dplyr::case_when(
          label == "Gram positive" ~ as.character(max(as.integer(.x$n), na.rm = TRUE)),
          grepl("etiology_gp___", variable) ~ NA,
          .default = n
        )
      )
  )

tbl_stack(list(
  micro_mic,
  micro_eti,
  micro_type,
  micro_path_gn,
  micro_path_gp
)) %>%
  modify_footnote(all_stat_cols() ~ NA)
Characteristic N n (%)
Microbiological tests performed 94 40 (43%)
    Missing
1
Etiology identified 40 17 (43%)
Polymicrobial infection 17
    Monomicrobial
12 (71%)
    Polymicrobial
5 (29%)
Etiology 17
    Gram -
7 (41%)
    Gram - & Gram +
5 (29%)
    Gram +
3 (18%)
    Other
2 (12%)
Gram negative 17
    Escherichia coli
6 (35%)
    Klebsiella spp
2 (12%)
    Enterobacter spp
4 (24%)
    Citrobacter spp
1 (5.9%)
    Serratia spp
0 (0%)
    Morganella spp
1 (5.9%)
    Proteus spp
1 (5.9%)
    Salmonella spp
0 (0%)
    Pseudomonas spp
0 (0%)
    Acinetobacter spp
0 (0%)
    Stenotrophomonas spp
0 (0%)
    Other
0 (0%)
Gram positive 17
    Staphylococcus aureus
1 (5.9%)
    Coagulase-negative Staphylococci
3 (18%)
    Streptococcus spp
0 (0%)
    Enterococcus faecalis
2 (12%)
    Enterococcus faecium
3 (18%)
    Other
1 (5.9%)

The End

That’s all folks.

Acknowledgments

Thanks to the cool guys at https://stackoverflow.com/ for helping with whatever I could not do.

References

[1]
Wickham H. ggplot2: Elegant graphics for data analysis. Springer-Verlag New York; 2016.
[2]
[3]
[4]
Sjoberg DD, Whiting K, Curry M, Lavery JA, Larmarange J. Reproducible summary tables with the gtsummary package. The R Journal 2021;13:570–80. https://doi.org/10.32614/RJ-2021-053.
[5]
[6]
Heinzen E, Sinnwell J, Atkinson E, Gunderson T, Dougherty G. Arsenal: An arsenal of ’r’ functions for large-scale statistical summaries. 2021.
[7]
Buuren S van, Groothuis-Oudshoorn K. Mice: Multivariate imputation by chained equations inR. Journal of Statistical Software 2011;45. https://doi.org/10.18637/jss.v045.i03.

Nerd stats

Code
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.2 (2024-10-31)
 os       macOS Sonoma 14.6.1
 system   x86_64, darwin23.6.0
 ui       unknown
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Rome
 date     2025-01-03
 pandoc   3.6.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package           * version  date (UTC) lib source
 abind               1.4-8    2024-09-12 [1] CRAN (R 4.4.2)
 arsenal             3.6.3    2021-06-04 [1] CRAN (R 4.4.2)
 backports           1.5.0    2024-05-23 [1] CRAN (R 4.4.2)
 base64enc           0.1-3    2015-07-28 [1] CRAN (R 4.4.2)
 bitops              1.0-9    2024-10-03 [1] CRAN (R 4.4.2)
 boot                1.3-31   2024-08-28 [3] CRAN (R 4.4.2)
 broom               1.0.7    2024-09-26 [1] CRAN (R 4.4.2)
 broom.helpers       1.17.0   2024-08-28 [1] CRAN (R 4.4.2)
 cachem              1.1.0    2024-05-16 [1] CRAN (R 4.4.2)
 car                 3.1-3    2024-09-27 [1] CRAN (R 4.4.2)
 carData             3.0-5    2022-01-06 [1] CRAN (R 4.4.2)
 cards               0.4.0    2024-11-27 [1] CRAN (R 4.4.2)
 cardx               0.2.2    2024-11-27 [1] CRAN (R 4.4.2)
 caTools             1.18.3   2024-09-04 [1] CRAN (R 4.4.2)
 checkmate           2.3.2    2024-07-29 [1] CRAN (R 4.4.2)
 cli                 3.6.3    2024-06-21 [1] CRAN (R 4.4.2)
 cluster             2.1.6    2023-12-01 [3] CRAN (R 4.4.2)
 codetools           0.2-20   2024-03-31 [3] CRAN (R 4.4.2)
 colorspace          2.1-1    2024-07-26 [1] CRAN (R 4.4.2)
 commonmark          1.9.2    2024-10-04 [1] CRAN (R 4.4.2)
 cowplot             1.1.3    2024-01-22 [1] CRAN (R 4.4.2)
 curl                6.0.1    2024-11-14 [1] CRAN (R 4.4.2)
 data.table          1.16.2   2024-10-10 [1] CRAN (R 4.4.2)
 DEoptimR            1.1-3-1  2024-11-23 [1] CRAN (R 4.4.2)
 devtools          * 2.4.5    2022-10-11 [1] CRAN (R 4.4.2)
 digest              0.6.37   2024-08-19 [1] CRAN (R 4.4.2)
 doParallel          1.0.17   2022-02-07 [1] CRAN (R 4.4.2)
 dplyr             * 1.1.4    2023-11-17 [1] CRAN (R 4.4.2)
 ellipsis            0.3.2    2021-04-29 [1] CRAN (R 4.4.2)
 evaluate            1.0.1    2024-10-10 [1] CRAN (R 4.4.2)
 fansi               1.0.6    2023-12-08 [1] CRAN (R 4.4.2)
 farver              2.1.2    2024-05-13 [1] CRAN (R 4.4.2)
 fastmap             1.2.0    2024-05-15 [1] CRAN (R 4.4.2)
 forcats           * 1.0.0    2023-01-29 [1] CRAN (R 4.4.2)
 foreach             1.5.2    2022-02-02 [1] CRAN (R 4.4.2)
 foreign             0.8-87   2024-06-26 [3] CRAN (R 4.4.2)
 Formula             1.2-5    2023-02-24 [1] CRAN (R 4.4.2)
 fs                  1.6.5    2024-10-30 [1] CRAN (R 4.4.2)
 generics            0.1.3    2022-07-05 [1] CRAN (R 4.4.2)
 ggnewscale          0.5.0    2024-07-19 [1] CRAN (R 4.4.2)
 ggplot2           * 3.5.1    2024-04-23 [1] CRAN (R 4.4.2)
 ggpubr            * 0.6.0    2023-02-10 [1] CRAN (R 4.4.2)
 ggsignif            0.6.4    2022-10-13 [1] CRAN (R 4.4.2)
 glmnet              4.1-8    2023-08-22 [1] CRAN (R 4.4.2)
 glue                1.8.0    2024-09-30 [1] CRAN (R 4.4.2)
 gridExtra           2.3      2017-09-09 [1] CRAN (R 4.4.2)
 gt                  0.11.1   2024-10-04 [1] CRAN (R 4.4.2)
 gtable              0.3.6    2024-10-25 [1] CRAN (R 4.4.2)
 gtsummary         * 2.0.4    2024-11-30 [1] CRAN (R 4.4.2)
 haven             * 2.5.4    2023-11-30 [1] CRAN (R 4.4.2)
 Hmisc             * 5.2-1    2024-12-02 [1] CRAN (R 4.4.2)
 hms                 1.1.3    2023-03-21 [1] CRAN (R 4.4.2)
 htmlTable           2.4.3    2024-07-21 [1] CRAN (R 4.4.2)
 htmltools           0.5.8.1  2024-04-04 [1] CRAN (R 4.4.2)
 htmlwidgets         1.6.4    2023-12-06 [1] CRAN (R 4.4.2)
 httpuv              1.6.15   2024-03-26 [1] CRAN (R 4.4.2)
 iterators           1.0.14   2022-02-05 [1] CRAN (R 4.4.2)
 jomo                2.7-6    2023-04-15 [1] CRAN (R 4.4.2)
 jsonlite            1.8.9    2024-09-20 [1] CRAN (R 4.4.2)
 km.ci               0.5-6    2022-04-06 [1] CRAN (R 4.4.2)
 KMsurv              0.1-5    2012-12-03 [1] CRAN (R 4.4.2)
 knitr               1.49     2024-11-08 [1] CRAN (R 4.4.2)
 labeling            0.4.3    2023-08-29 [1] CRAN (R 4.4.2)
 labelled            2.13.0   2024-04-23 [1] CRAN (R 4.4.2)
 later               1.3.2    2023-12-06 [1] CRAN (R 4.4.2)
 lattice             0.22-6   2024-03-20 [3] CRAN (R 4.4.2)
 lifecycle           1.0.4    2023-11-07 [1] CRAN (R 4.4.2)
 lme4                1.1-35.5 2024-07-03 [1] CRAN (R 4.4.2)
 lubridate         * 1.9.3    2023-09-27 [1] CRAN (R 4.4.2)
 magrittr            2.0.3    2022-03-30 [1] CRAN (R 4.4.2)
 markdown            1.13     2024-06-04 [1] CRAN (R 4.4.2)
 MASS                7.3-61   2024-06-13 [3] CRAN (R 4.4.2)
 Matrix              1.7-1    2024-10-18 [3] CRAN (R 4.4.2)
 memoise             2.0.1    2021-11-26 [1] CRAN (R 4.4.2)
 mgcv                1.9-1    2023-12-21 [3] CRAN (R 4.4.2)
 mice                3.17.0   2024-11-27 [1] CRAN (R 4.4.2)
 mime                0.12     2021-09-28 [1] CRAN (R 4.4.2)
 miniUI              0.1.1.1  2018-05-18 [1] CRAN (R 4.4.2)
 minqa               1.2.8    2024-08-17 [1] CRAN (R 4.4.2)
 mitml               0.4-5    2023-03-08 [1] CRAN (R 4.4.2)
 munsell             0.5.1    2024-04-01 [1] CRAN (R 4.4.2)
 nlme                3.1-166  2024-08-14 [3] CRAN (R 4.4.2)
 nloptr              2.1.1    2024-06-25 [1] CRAN (R 4.4.2)
 nnet                7.3-19   2023-05-03 [3] CRAN (R 4.4.2)
 opdisDownsampling   1.0.1    2024-04-15 [1] CRAN (R 4.4.2)
 pacman            * 0.5.1    2019-03-11 [1] CRAN (R 4.4.2)
 pan                 1.9      2023-12-07 [1] CRAN (R 4.4.2)
 pbmcapply           1.5.1    2022-04-28 [1] CRAN (R 4.4.2)
 pillar              1.9.0    2023-03-22 [1] CRAN (R 4.4.2)
 pkgbuild            1.4.5    2024-10-28 [1] CRAN (R 4.4.2)
 pkgconfig           2.0.3    2019-09-22 [1] CRAN (R 4.4.2)
 pkgload             1.4.0    2024-06-28 [1] CRAN (R 4.4.2)
 pracma              2.4.4    2023-11-10 [1] CRAN (R 4.4.2)
 profvis             0.4.0    2024-09-20 [1] CRAN (R 4.4.2)
 promises            1.3.0    2024-04-05 [1] CRAN (R 4.4.2)
 purrr             * 1.0.2    2023-08-10 [1] CRAN (R 4.4.2)
 qqconf              1.3.2    2023-04-14 [1] CRAN (R 4.4.2)
 qqplotr             0.0.6    2023-01-25 [1] CRAN (R 4.4.2)
 R.cache             0.16.0   2022-07-21 [1] CRAN (R 4.4.2)
 R.methodsS3         1.8.2    2022-06-13 [1] CRAN (R 4.4.2)
 R.oo                1.27.0   2024-11-01 [1] CRAN (R 4.4.2)
 R.utils             2.12.3   2023-11-18 [1] CRAN (R 4.4.2)
 R6                  2.5.1    2021-08-19 [1] CRAN (R 4.4.2)
 Rcpp                1.0.13-1 2024-11-02 [1] CRAN (R 4.4.2)
 readr             * 2.1.5    2024-01-10 [1] CRAN (R 4.4.2)
 remotes             2.5.0    2024-03-17 [1] CRAN (R 4.4.2)
 rlang               1.1.4    2024-06-04 [1] CRAN (R 4.4.2)
 rmarkdown           2.29     2024-11-04 [1] CRAN (R 4.4.2)
 robustbase          0.99-4-1 2024-09-27 [1] CRAN (R 4.4.2)
 rpart               4.1.23   2023-12-05 [3] CRAN (R 4.4.2)
 rstatix             0.7.2    2023-02-01 [1] CRAN (R 4.4.2)
 rstudioapi          0.17.1   2024-10-22 [1] CRAN (R 4.4.2)
 sass                0.4.9    2024-03-15 [1] CRAN (R 4.4.2)
 scales              1.3.0    2023-11-28 [1] CRAN (R 4.4.2)
 sessioninfo         1.2.2    2021-12-06 [1] CRAN (R 4.4.2)
 shape               1.4.6.1  2024-02-23 [1] CRAN (R 4.4.2)
 shiny               1.9.1    2024-08-01 [1] CRAN (R 4.4.2)
 stringi             1.8.4    2024-05-06 [1] CRAN (R 4.4.2)
 stringr           * 1.5.1    2023-11-14 [1] CRAN (R 4.4.2)
 styler              1.10.3   2024-04-07 [1] CRAN (R 4.4.2)
 survival            3.7-0    2024-06-05 [3] CRAN (R 4.4.2)
 survminer         * 0.5.0    2024-10-30 [1] CRAN (R 4.4.2)
 survMisc            0.5.6    2022-04-07 [1] CRAN (R 4.4.2)
 tibble            * 3.2.1    2023-03-20 [1] CRAN (R 4.4.2)
 tidyr             * 1.3.1    2024-01-24 [1] CRAN (R 4.4.2)
 tidyselect          1.2.1    2024-03-11 [1] CRAN (R 4.4.2)
 tidyverse         * 2.0.0    2023-02-22 [1] CRAN (R 4.4.2)
 timechange          0.3.0    2024-01-18 [1] CRAN (R 4.4.2)
 twosamples          2.0.1    2023-06-23 [1] CRAN (R 4.4.2)
 tzdb                0.4.0    2023-05-12 [1] CRAN (R 4.4.2)
 urlchecker          1.0.1    2021-11-30 [1] CRAN (R 4.4.2)
 usethis           * 3.0.0    2024-07-29 [1] CRAN (R 4.4.2)
 utf8                1.2.4    2023-10-22 [1] CRAN (R 4.4.2)
 vctrs               0.6.5    2023-12-01 [1] CRAN (R 4.4.2)
 withr               3.0.2    2024-10-28 [1] CRAN (R 4.4.2)
 xfun                0.49     2024-10-31 [1] CRAN (R 4.4.2)
 xml2                1.3.6    2023-12-04 [1] CRAN (R 4.4.2)
 xtable              1.8-4    2019-04-21 [1] CRAN (R 4.4.2)
 yaml                2.3.10   2024-07-26 [1] CRAN (R 4.4.2)
 zoo                 1.8-12   2023-04-13 [1] CRAN (R 4.4.2)

 [1] /Users/devster/Library/R/x86_64/4.4/library
 [2] /usr/local/lib/R/4.4/site-library
 [3] /usr/local/Cellar/r/4.4.2_2/lib/R/library

──────────────────────────────────────────────────────────────────────────────