Page History
...
The following SQL Script can be exected at the end of an ETL (extraction, transformation, loading) pathway to calculate all value for the C_TOTALNUM column automatically. The script was developed on an Oracle 11g platform but should be easily portable to other relational databases.
Info | ||
---|---|---|
| ||
Please note that the script will calculate the patient counts only over facts stored in the OBSERVATION_FACT table. If you installation contains fact data in other tables (e.g. PATIENT_DIMENSION) the script will need to be adapted to your individual configuration. |
Code Block | ||||
---|---|---|---|---|
| ||||
-- SQL Script to calculate the column i2b2.c_totalnum -- 1. Backup the i2b2 table CREATE TABLE t_i2b2 AS (SELECT * FROM i2b2); -- 2a. Calculate count of distinct patients for all branches and leaves of concept hierarchy (regular concepts CREATE TABLE t_totalnum AS SELECT i2b.c_tablename, i2b.c_fullname, COUNT(DISTINCT obs.patient_num) AS c_totalnum FROM observation_fact obs JOIN concept_dimension con ON obs.concept_cd = con.concept_cd JOIN i2b2 i2b ON con.concept_path LIKE i2b.c_fullname || '%' GROUP BY i2b.c_tablename,i2b.c_fullname; -- 2b. (Optionally) Calculate count of distinct patients for all branches and leaves of concept hierarchy (modifier concepts INSERT INTO t_totalnum (c_tablename, c_fullname, c_totalnum) SELECT i2b.c_tablename, i2b.c_fullname, COUNT(DISTINCT obs.patient_num) AS c_totalnum FROM observation_fact obs JOIN modifier_dimension con ON obs.modifier_cd = con.modifier_cd JOIN i2b2 i2b ON con.modifier_path LIKE i2b.c_fullname || '%' GROUP BY i2b.c_tablename,i2b.c_fullname; -- 3. Truncate i2b2 table TRUNCATE TABLE i2b2; -- 4. Repopulate the i2b2 table from its backup and the generated counts INSERT INTO i2b2 (c_hlevel, c_fullname, c_name, c_synonym_cd, c_visualattributes, c_totalnum, c_basecode, c_metadataxml, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype, c_operator, c_dimcode, c_comment, c_tooltip, m_applied_path, update_date, download_date, import_date, sourcesystem_cd, valuetype_cd, m_exclusion_cd, c_path, c_symbol) SELECT i2b.c_hlevel, i2b.c_fullname, i2b.c_name, i2b.c_synonym_cd, i2b.c_visualattributes, NVL(tnm.c_totalnum, 0), i2b.c_basecode, i2b.c_metadataxml, i2b.c_facttablecolumn, i2b.c_tablename, i2b.c_columnname, i2b.c_columndatatype, i2b.c_operator, i2b.c_dimcode, i2b.c_comment, i2b.c_tooltip, i2b.m_applied_path, i2b.update_date, i2b.download_date, i2b.import_date, i2b.sourcesystem_cd, i2b.valuetype_cd, i2b.m_exclusion_cd, i2b.c_path, i2b.c_symbol FROM t_i2b2 i2b LEFT JOIN t_totalnum tnm ON i2b.c_tablename = tnm.c_tablename AND i2b.c_fullname = tnm.c_fullname; -- 5. Drop temporary tables DROP TABLE t_totalnum; DROP TABLE t_i2b2; -- 6. Commit changes COMMIT; |
...
Code Block | ||||
---|---|---|---|---|
| ||||
DROP TABLE "OBSERVATION_FACT" cascade constraints;
CREATE TABLE "OBSERVATION_FACT"
( "ENCOUNTER_NUM" NUMBER(38,0) NOT NULL ENABLE,
"PATIENT_NUM" NUMBER(38,0) NOT NULL ENABLE,
"CONCEPT_CD" VARCHAR2(50 BYTE) NOT NULL ENABLE,
"PROVIDER_ID" VARCHAR2(50 BYTE) NOT NULL ENABLE,
"START_DATE" DATE NOT NULL ENABLE,
"MODIFIER_CD" VARCHAR2(100 BYTE) NOT NULL ENABLE,
"INSTANCE_NUM" NUMBER(18,0) NOT NULL ENABLE,
"VALTYPE_CD" VARCHAR2(50 BYTE),
"TVAL_CHAR" VARCHAR2(255 CHAR),
"NVAL_NUM" NUMBER(18,5),
"VALUEFLAG_CD" VARCHAR2(50 BYTE),
"QUANTITY_NUM" NUMBER(18,5),
"UNITS_CD" VARCHAR2(50 BYTE),
"END_DATE" DATE,
"LOCATION_CD" VARCHAR2(50 BYTE),
"OBSERVATION_BLOB" CLOB,
"CONFIDENCE_NUM" NUMBER(18,5),
"UPDATE_DATE" DATE,
"DOWNLOAD_DATE" DATE,
"IMPORT_DATE" DATE,
"SOURCESYSTEM_CD" VARCHAR2(50 BYTE),
"UPLOAD_ID" NUMBER(38,0),
CONSTRAINT "OBSERVATION_FACT_PK" PRIMARY KEY ("ENCOUNTER_NUM", "CONCEPT_CD", "PROVIDER_ID", "START_DATE", "MODIFIER_CD", "INSTANCE_NUM") USING INDEX LOCAL ENABLE
)
LOB ("OBSERVATION_BLOB") STORE AS BASICFILE (ENABLE STORAGE IN ROW CHUNK 8192 RETENTION)
PARTITION BY RANGE ("CONCEPT_CD")
(
PARTITION BIO VALUES LESS THAN ('DEM'), -- "BIO" partition for biomaterials
PARTITION DEM VALUES LESS THAN ('ENC'), -- "DEM" partition for demographics
PARTITION ENC VALUES LESS THAN ('ICD'), -- "ENC" partition for encounter data
PARTITION ICD VALUES LESS THAN ('LAB'), -- "ICD" partition for diagnoses
PARTITION LAB VALUES LESS THAN ('OPS'), -- "LAB" partition for lab findings
PARTITION OPS VALUES LESS THAN ('ORG'), -- "OPS" partition for procedures
PARTITION ORG VALUES LESS THAN ('PAT'), -- "ORG" partition for organizational data
PARTITION PAT VALUES LESS THAN ('RAD'), -- "PAT" partition for pathology findings
PARTITION RAD VALUES LESS THAN ('THE'), -- "RAD" partition for radiology findings
PARTITION THE VALUES LESS THAN ('ZZZ') -- "THE" partition for therapy data
);
CREATE INDEX of_ctx_blob ON observation_fact (observation_blob) indextype is ctxsys.context parameters ('sync (on commit)');
CREATE INDEX fact_nolob ON observation_fact (patient_num, start_date, concept_cd, encounter_num, instance_num, nval_num, tval_char, valtype_cd, modifier_cd, valueflag_cd, provider_id, quantity_num, units_cd, end_date, location_cd, confidence_num, update_date, download_date, import_date, sourcesystem_cd, upload_id) LOCAL;
CREATE INDEX fact_patcon_date_prvd_idx ON observation_fact (patient_num, concept_cd, start_date, end_date, encounter_num, instance_num, provider_id, nval_num, valtype_cd) LOCAL;
CREATE INDEX idrt_fact_cnpt_pat_enct_idx ON observation_fact (concept_cd, instance_num, patient_num, encounter_num) LOCAL;
CREATE INDEX idrt_fact_mdf_pat_enct_idx ON observation_fact (modifier_cd, instance_num, patient_num, encounter_num) LOCAL;
|
...