Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
799ca48
Added a function that returns a hardcoded list with the correct order…
quazi-h Dec 6, 2021
16013ce
Added a new extraction pipeline for AFUS and updated some of the filt…
quazi-h Dec 7, 2021
99391a4
Added the afus_owner schema to the project
quazi-h Dec 8, 2021
7001a74
Updates to AFUS extraction, updating tests.
quazi-h Dec 9, 2021
7a4ed52
Added afus_owner to output from tsv script.
quazi-h Dec 13, 2021
3e6547b
Changes to AFUS Extraction, TESTING
quazi-h Dec 13, 2021
e53ac45
Adding "baseline_arm_1" arm to list of RedCap events to pull from.
quazi-h Dec 14, 2021
c9e167a
New AfusTransformation pipeline added, logic to process the afus_owne…
quazi-h Dec 14, 2021
9baa062
Trimming whitespace from fields that are to be stripped.
quazi-h Dec 22, 2021
827ca0c
"Using the correct method to pull owner_id, no longer need to cast th…
quazi-h Dec 22, 2021
3645a23
Removing extraction changes from this branch (branched from extractio…
quazi-h Dec 22, 2021
91a3e1d
Merge branch 'master' into qh-dspdc-1958-afus-owner-transformations
quazi-h Dec 22, 2021
603184c
Merge branch 'master' into qh-force-tsv-column-order
quazi-h Dec 22, 2021
ed20d9a
Added forms needed for afus_dog extraction.
quazi-h Dec 22, 2021
326b360
Remaining fragments and transformation scripts for afus dog tables.
quazi-h Jan 19, 2022
a690954
Decreased extract batch size to 1, using a batch size of 5-10 caused …
quazi-h Jan 19, 2022
f103a76
Removed an unnecessary link in afus_owner table.
quazi-h Jan 19, 2022
1bcb9c4
Merge branch 'qh-dspdc-1958-afus-owner-transformations' into qh-dspdc…
quazi-h Jan 19, 2022
1a6cd1b
Removed unused import
quazi-h Jan 19, 2022
129c527
Merge branch 'qh-dspdc-1958-afus-owner-transformations' into qh-dspdc…
quazi-h Jan 19, 2022
0504a3c
Merge branch 'master' into qh-dspdc-1958-afus-dog
quazi-h Jan 20, 2022
f7c92d8
Merge branch 'master' into qh-dspdc-1958-afus-dog
quazi-h Jan 20, 2022
0b58311
Merge branch 'qh-dspdc-1958-afus-dog' into qh-dspdc-1958-afus-dog-merged
quazi-h Jan 20, 2022
7a58741
cancer and health table schemas
quazi-h Jan 20, 2022
01cb320
Some changes to address issues that came up when reviewing the data
quazi-h Jan 20, 2022
8614d8e
Merge remote-tracking branch 'origin/qh-dspdc-1958-health-cancer-tran…
quazi-h Jan 21, 2022
6bb28fb
additional requested changes and filter conditions
quazi-h Feb 8, 2022
f8a30aa
missing files
quazi-h Feb 8, 2022
6b4a9ce
Filter using predicate
aherbst-broad Feb 8, 2022
397d489
Issues uncovered during afus TSV review
quazi-h Feb 10, 2022
d995c8b
Removing fu_completion requirement on afus extraction - will filter a…
quazi-h Mar 2, 2022
3bd3662
Merge branch 'hles-filter-update' into qh-dspdc-1958-afus-dog-merged
quazi-h Mar 2, 2022
d4bc932
Merge branch 'qh-dspdc-1958-afus-dog' into qh-dspdc-1958-afus-dog-merged
quazi-h Mar 2, 2022
6183c86
Merge branch 'qh-force-tsv-column-order' into qh-dspdc-1958-afus-dog-…
quazi-h Mar 2, 2022
14f65a8
Issues uncovered during afus TSV review
quazi-h Jul 21, 2022
b52d138
Testing bug.
quazi-h Aug 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,39 @@ object AfusExtractionPipeline extends ScioApp[Args] {
"followup_status",
"followup_owner_contact",
"study_status",
"followup_owner_demographics"
"followup_owner_demographics",
"followup_dog_demographics",
"followup_physical_activity",
"followup_environment",
"followup_behavior",
"followup_diet",
"followup_meds_and_preventives",
"followup_canine_eating_behavior_dora",
"followup_dogowner_relationship_survey_mdors",
"followup_health_status",
"followup_health_status_part_1",
"followup_health_status_part_2",
"followup_health_status_part_3"
)

def extractionFiltersGenerator(args: Args): List[FilterDirective] = {
// FORM = followup_status
val standardDirectives: List[FilterDirective] = List(
FilterDirective("fu_is_completed", FilterOps.==, "1")
// DAP Pack filters
FilterDirective("st_dap_pack_count", FilterOps.>, "0"),
FilterDirective("st_vip_or_staff", FilterOps.==, "0")
)
val dateFilters: List[FilterDirective] = {
args.startTime
.map(start =>
args.endTime
.map(end =>
List(
FilterDirective(
"fu_complete_date",
FilterOps.>,
RedCapClient.redcapFormatDate(start)
FilterOps.<,
RedCapClient.redcapFormatDate(end)
)
)
)
.getOrElse(List()) ++
args.endTime
.map(end =>
List(
FilterDirective(
"fu_complete_date",
FilterOps.<,
RedCapClient.redcapFormatDate(end)
)
)
)
.getOrElse(List())
.getOrElse(List())
}

standardDirectives ++ dateFilters
Expand Down Expand Up @@ -79,9 +81,16 @@ object AfusExtractionPipeline extends ScioApp[Args] {
// afus has one arm per year ("fup_{sequence}_arm_1")
val yearList = getYearList(startDate, endDate)
val yearSeqList = List.range(1, yearList.length + 1)
yearSeqList.map { seq =>
s"fup_${seq}_arm_1"
} ++ List("baseline_arm_1")
// yearSeqList.map { seq =>
// s"fup_${seq}_arm_1"
// } ++ List("baseline_arm_1")
if (1 == 1) {
List("fup_1_arm_1", "baseline_arm_1")
} else {
yearSeqList.map { seq =>
s"fup_${seq}_arm_1"
} ++ List("baseline_arm_1")
}
}

def buildPipelineWithWrapper(wrapper: HttpWrapper): PipelineBuilder[Args] =
Expand All @@ -91,7 +100,7 @@ object AfusExtractionPipeline extends ScioApp[Args] {
extractionArmsGenerator,
fieldList,
subdir,
10,
1,
RedCapClient.apply(_: List[String], wrapper)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,25 @@ object CslbExtractionPipeline extends ScioApp[Args] {

val forms = List(
"recruitment_fields",
"canine_social_and_learned_behavior"
"canine_social_and_learned_behavior",
"study_status"
)

// Magic marker for "completed".
// NB: We are purposefully excluding the recruitment_fields_complete -> 2
// mapping, as that conflicts with the CSLB data
def extractionFiltersGenerator(args: Args): List[FilterDirective] =
List(
FilterDirective("canine_social_and_learned_behavior_complete", FilterOps.==, "2")
) ++
args.startTime
.map(start =>
List(FilterDirective("cslb_date", FilterOps.>, RedCapClient.redcapFormatDate(start)))
)
.getOrElse(List()) ++
def extractionFiltersGenerator(args: Args): List[FilterDirective] = {
val standardDirectives: List[FilterDirective] = List(
FilterDirective("canine_social_and_learned_behavior_complete", FilterOps.==, "2"),
FilterDirective("st_vip_or_staff", FilterOps.==, "0")
)
val dateFilters: List[FilterDirective] = {
args.endTime
.map(end => List(FilterDirective("cslb_date", FilterOps.<, RedCapClient.redcapFormatDate(end))))
.getOrElse(List())
}
standardDirectives ++ dateFilters
}

val subdir = "cslb"

Expand All @@ -57,10 +58,10 @@ object CslbExtractionPipeline extends ScioApp[Args] {
// cslb has one arm per year ("annual_{yyyy}_arm_1")
getYearList(startDate, endDate).map { date =>
s"annual_${date}_arm_1"
}
} ++ List("baseline_arm_1")
}

val fieldList = List("co_consent")
val fieldList = List("co_consent", "st_vip_or_staff")

def buildPipelineWithWrapper(wrapper: HttpWrapper): PipelineBuilder[Args] =
new ExtractionPipelineBuilder(
Expand All @@ -69,7 +70,7 @@ object CslbExtractionPipeline extends ScioApp[Args] {
extractionArmsGenerator,
fieldList,
subdir,
1000,
10,
RedCapClient.apply(_: List[String], wrapper)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,24 @@ object EnvironmentExtractionPipeline extends ScioApp[Args] {

// Magic marker for "completed".
// NB: We are looking for baseline_complete -> 2
def extractionFiltersGenerator(args: Args): List[FilterDirective] =
List(FilterDirective("baseline_complete", FilterOps.==, "2")) ++
args.startTime
.map(start =>
List(FilterDirective("bl_dap_pack_date", FilterOps.>, RedCapClient.redcapFormatDate(start)))
)
.getOrElse(List()) ++
args.endTime
.map(end =>
List(FilterDirective("bl_dap_pack_date", FilterOps.<, RedCapClient.redcapFormatDate(end)))
)
.getOrElse(List())
def extractionFiltersGenerator(args: Args): List[FilterDirective] = {
val dateFilters: List[FilterDirective] = {
List(FilterDirective("baseline_complete", FilterOps.==, "2")) ++
args.startTime
.map(start =>
List(
FilterDirective("bl_dap_pack_date", FilterOps.>, RedCapClient.redcapFormatDate(start))
)
)
.getOrElse(List()) ++
args.endTime
.map(end =>
List(FilterDirective("bl_dap_pack_date", FilterOps.<, RedCapClient.redcapFormatDate(end)))
)
.getOrElse(List())
}
dateFilters
}

val subdir = "environment"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,19 @@ object EolsExtractionPipeline extends ScioApp[Args] {
.map(form => FilterDirective(s"${form}_complete", FilterOps.==, "2"))
// EOLS consent
val standardDirectives: List[FilterDirective] = List(
FilterDirective("eol_willing_to_complete", FilterOps.==, "1")
FilterDirective("eol_willing_to_complete", FilterOps.==, "1"),
// DAP Pack filters
FilterDirective("st_vip_or_staff", FilterOps.==, "0")
)
val dateFilters: List[FilterDirective] =
args.startTime
.map(start =>
val dateFilters: List[FilterDirective] = {
args.endTime
.map(end =>
List(
FilterDirective("eol_date_dog_died", FilterOps.>, RedCapClient.redcapFormatDate(start))
FilterDirective("eol_date_dog_died", FilterOps.<, RedCapClient.redcapFormatDate(end))
)
)
.getOrElse(List()) ++
args.endTime
.map(end =>
List(
FilterDirective("eol_date_dog_died", FilterOps.<, RedCapClient.redcapFormatDate(end))
)
)
.getOrElse(List())
.getOrElse(List())
}

completionFilters ++ standardDirectives ++ dateFilters
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ object HLESExtractionPipeline extends ScioApp[Args] {
def extractionFiltersGenerator(args: Args): List[FilterDirective] = {
val standardDirectives: List[FilterDirective] = List(
FilterDirective("st_dap_pack_count", FilterOps.>, "0"),
FilterDirective(
"st_dap_pack_date",
FilterOps.>,
RedCapClient.redcapFormatDate(args.startTime.getOrElse(HLESEpoch))
)
FilterDirective("st_vip_or_staff", FilterOps.==, "0")
)
val endFilter: List[FilterDirective] =
args.endTime
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,22 @@ object SampleExtractionPipeline extends ScioApp[Args] {
"recruitment_fields"
)

def extractionFiltersGenerator(args: Args): List[FilterDirective] =
args.startTime
.map(start =>
List(
FilterDirective("k1_rtn_tracking_date", FilterOps.>, RedCapClient.redcapFormatDate(start))
)
)
.getOrElse(List()) ++
def extractionFiltersGenerator(args: Args): List[FilterDirective] = {
val standardDirectives: List[FilterDirective] = List(
// DAP Pack filters
FilterDirective("st_dap_pack_count", FilterOps.>, "0")
)
val dateFilters: List[FilterDirective] = {
args.endTime
.map(end =>
List(
FilterDirective("k1_rtn_tracking_date", FilterOps.<, RedCapClient.redcapFormatDate(end))
)
)
.getOrElse(List())
}
standardDirectives ++ dateFilters
}

val subdir = "sample";
val arm = "baseline_arm_1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class AfusExtractionPipelineBuilderIntegrationSpec extends PipelineBuilderSpec[A
testArgs.apiToken,
arms,
GetRecords(
fields = List("st_owner_id"),
fields = List("study_id"),
filters = AfusExtractionPipeline.extractionFiltersGenerator(testArgs)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,19 @@ class AfusExtractionPipelineSpec extends AnyFlatSpec with Matchers {

val afusArms = AfusExtractionPipeline.extractionArmsGenerator(Some(startTime), Some(endTime))

afusArms shouldBe List("fup_1_arm_1", "fup_2_arm_1", "fup_3_arm_1", "baseline_arm_1")
// todo: update when extraction arm generator logic is updated
afusArms shouldBe List("fup_1_arm_1", "baseline_arm_1")
//afusArms shouldBe List("fup_1_arm_1", "fup_2_arm_1", "fup_3_arm_1", "baseline_arm_1")
}

it should "use AFUSEpoch as startTime when none is provided" in {
val endTime =
OffsetDateTime.of(2022, 12, 1, 0, 0, 0, 0, ZoneOffset.ofHours(-5))
val afusArms = AfusExtractionPipeline.extractionArmsGenerator(None, Some(endTime))

afusArms shouldBe List("fup_1_arm_1", "fup_2_arm_1", "baseline_arm_1")
// todo: update when extraction arm generator logic is updated
afusArms shouldBe List("fup_1_arm_1", "baseline_arm_1")
//afusArms shouldBe List("fup_1_arm_1", "fup_2_arm_1", "baseline_arm_1")
}

it should "fail when startTime is after endTime" in {
Expand Down
Loading