From 74a0c32e5bba6fc860d79313f053e932032e79e1 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 00:03:14 -0500 Subject: [PATCH 01/16] Added some potential test data for NodeNorm. --- tests/data/compendia/Gene.txt | 53 ++++++++++++++++++++++++ tests/data/compendia/Protein.txt | 58 +++++++++++++++++++++++++++ tests/data/conflation/GeneProtein.txt | 3 ++ 3 files changed, 114 insertions(+) create mode 100644 tests/data/compendia/Gene.txt create mode 100644 tests/data/compendia/Protein.txt create mode 100644 tests/data/conflation/GeneProtein.txt diff --git a/tests/data/compendia/Gene.txt b/tests/data/compendia/Gene.txt new file mode 100644 index 0000000..7ce162f --- /dev/null +++ b/tests/data/compendia/Gene.txt @@ -0,0 +1,53 @@ +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:65329674", "l": "trnS-UGA", "d": ["tRNA-Ser"], "t": ["NCBITaxon:934069"]}], "preferred_name": "trnS-UGA", "taxa": ["NCBITaxon:934069"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:106478148", "l": "LOC106478148", "d": ["netrin receptor UNC5C-like"], "t": ["NCBITaxon:6850"]}, {"i": "ENSEMBL:LOC106478148", "l": "", "d": [], "t": []}], "preferred_name": "LOC106478148", "taxa": ["NCBITaxon:6850"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:119986099", "l": "LOC119986099", "d": ["B-box zinc finger protein 22"], "t": ["NCBITaxon:458696"]}], "preferred_name": "LOC119986099", "taxa": ["NCBITaxon:458696"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:134820705", "l": "LOC134820705", "d": ["uncharacterized LOC134820705"], "t": ["NCBITaxon:2820187"]}], "preferred_name": "LOC134820705", "taxa": ["NCBITaxon:2820187"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:140317633", "l": "exosc3", "d": ["exosome component 3"], "t": ["NCBITaxon:355514"]}], "preferred_name": "exosc3", "taxa": ["NCBITaxon:355514"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:51369503", "l": "ESD82_RS02965", "d": ["tetrathionate reductase family octaheme c-type cytochrome"], "t": ["NCBITaxon:82367"]}], "preferred_name": "ESD82_RS02965", "taxa": ["NCBITaxon:82367"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:116051546", "l": "hyal2a", "d": ["hyaluronidase 2a"], "t": ["NCBITaxon:283035"]}, {"i": "ENSEMBL:ENSSLUG00000020051", "l": "", "d": [], "t": []}], "preferred_name": "hyal2a", "taxa": ["NCBITaxon:283035"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:137637366", "l": "LOC137637366", "d": ["uncharacterized LOC137637366"], "t": ["NCBITaxon:392227"]}], "preferred_name": "LOC137637366", "taxa": ["NCBITaxon:392227"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:115155662", "l": "LOC115155662", "d": ["neurogenic differentiation factor 1"], "t": ["NCBITaxon:8032"]}], "preferred_name": "LOC115155662", "taxa": ["NCBITaxon:8032"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:138970819", "l": "LOC138970819", "d": ["transcriptional activator Myb-like"], "t": ["NCBITaxon:31220"]}, {"i": "ENSEMBL:ENSQEFG00000027629", "l": "", "d": [], "t": []}], "preferred_name": "LOC138970819", "taxa": ["NCBITaxon:31220"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:128783951", "l": "GATAD1", "d": ["GATA zinc finger domain containing 1"], "t": ["NCBITaxon:81927"]}], "preferred_name": "GATAD1", "taxa": ["NCBITaxon:81927"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:126767363", "l": "LOC126767363", "d": ["uncharacterized LOC126767363"], "t": ["NCBITaxon:98809"]}, {"i": "ENSEMBL:LOC126767363", "l": "", "d": [], "t": []}], "preferred_name": "LOC126767363", "taxa": ["NCBITaxon:98809"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:110089670", "l": "LOC110089670", "d": ["dual specificity tyrosine-phosphorylation-regulated kinase 1B"], "t": ["NCBITaxon:103695"]}, {"i": "ENSEMBL:ENSPVIG00000007256", "l": "", "d": [], "t": []}], "preferred_name": "LOC110089670", "taxa": ["NCBITaxon:103695"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "ENSEMBL:ENSXCOG00000017564", "l": "", "d": [], "t": []}], "preferred_name": "", "taxa": []} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:34687548", "l": "TGL3", "d": ["bifunctional triglyceride lipase/lysophosphatidylethanolamine acyltransferase"], "t": ["NCBITaxon:1245769"]}], "preferred_name": "TGL3", "taxa": ["NCBITaxon:1245769"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:140647695", "l": "TAF4B", "d": ["TATA-box binding protein associated factor 4b"], "t": ["NCBITaxon:52775"]}, {"i": "ENSEMBL:ENSCBOG00010016412", "l": "", "d": [], "t": []}], "preferred_name": "TAF4B", "taxa": ["NCBITaxon:52775"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:86199199", "l": "OXN82_RS05295", "d": ["Gar1/Naf1 family protein"], "t": ["NCBITaxon:2998972"]}], "preferred_name": "OXN82_RS05295", "taxa": ["NCBITaxon:2998972"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:122660255", "l": "LOC122660255", "d": ["sodium/hydrogen exchanger 2-like"], "t": ["NCBITaxon:54955"]}], "preferred_name": "LOC122660255", "taxa": ["NCBITaxon:54955"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:5216388", "l": "MSM_RS00910", "d": ["rubredoxin"], "t": ["NCBITaxon:420247"]}], "preferred_name": "MSM_RS00910", "taxa": ["NCBITaxon:420247"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:101717904", "l": "LOC101717904", "d": ["importin-7 pseudogene"], "t": ["NCBITaxon:10181"]}, {"i": "RGD:18905178", "l": "", "d": [], "t": []}], "preferred_name": "LOC101717904", "taxa": ["NCBITaxon:10181"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:131448389", "l": "LOC131448389", "d": ["SPEG neighbor protein-like"], "t": ["NCBITaxon:90069"]}], "preferred_name": "LOC131448389", "taxa": ["NCBITaxon:90069"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:103137158", "l": "LOC103137158", "d": ["AP-1 complex subunit sigma-2-like"], "t": ["NCBITaxon:48698"]}], "preferred_name": "LOC103137158", "taxa": ["NCBITaxon:48698"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:115096577", "l": "VMA12", "d": ["vacuolar ATPase assembly factor VMA12"], "t": ["NCBITaxon:194408"]}], "preferred_name": "VMA12", "taxa": ["NCBITaxon:194408"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:92145086", "l": "V2V93DRAFT_376255", "d": ["flavoprotein"], "t": ["NCBITaxon:1337062"]}], "preferred_name": "V2V93DRAFT_376255", "taxa": ["NCBITaxon:1337062"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:143943329", "l": "ARG2", "d": ["arginase 2"], "t": ["NCBITaxon:8404"]}], "preferred_name": "ARG2", "taxa": ["NCBITaxon:8404"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:137197181", "l": "dis3", "d": ["DIS3 exosome endoribonuclease and 3'-5' exoribonuclease"], "t": ["NCBITaxon:8237"]}, {"i": "ENSEMBL:ENSTTHG00005023634", "l": "", "d": [], "t": []}], "preferred_name": "dis3", "taxa": ["NCBITaxon:8237"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:117513657", "l": "LOC117513657", "d": ["uncharacterized LOC117513657"], "t": ["NCBITaxon:390379"]}], "preferred_name": "LOC117513657", "taxa": ["NCBITaxon:390379"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:712410", "l": "SLC7A1", "d": ["solute carrier family 7 member 1"], "t": ["NCBITaxon:9544"]}, {"i": "ENSEMBL:ENSMMUG00000008661", "l": "", "d": [], "t": []}], "preferred_name": "SLC7A1", "taxa": ["NCBITaxon:9544"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:140490921", "l": "trnav-uac", "d": ["transfer RNA valine (anticodon UAC)"], "t": ["NCBITaxon:137246"]}], "preferred_name": "trnav-uac", "taxa": ["NCBITaxon:137246"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:127573992", "l": "trnav-uac", "d": ["transfer RNA valine (anticodon UAC)"], "t": ["NCBITaxon:685728"]}], "preferred_name": "trnav-uac", "taxa": ["NCBITaxon:685728"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:41852696", "l": "F7P43_mgt07", "d": ["tRNA-Trp"], "t": ["NCBITaxon:288557"]}], "preferred_name": "F7P43_mgt07", "taxa": ["NCBITaxon:288557"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "ENSEMBL:ENSSSCG00030047449", "l": "", "d": [], "t": []}], "preferred_name": "", "taxa": []} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:110984389", "l": "LOC110984389", "d": ["single-stranded DNA-binding protein 3-like"], "t": ["NCBITaxon:133434"]}, {"i": "ENSEMBL:LOC110984389", "l": "", "d": [], "t": []}], "preferred_name": "LOC110984389", "taxa": ["NCBITaxon:133434"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:125204129", "l": "LOC125204129", "d": ["probable choline kinase 2"], "t": ["NCBITaxon:49212"]}], "preferred_name": "LOC125204129", "taxa": ["NCBITaxon:49212"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:123704163", "l": "Hacd1", "d": ["3-hydroxyacyl-CoA dehydratase 1"], "t": ["NCBITaxon:72248"]}, {"i": "ENSEMBL:ENSCEUG00000000663", "l": "", "d": [], "t": []}], "preferred_name": "Hacd1", "taxa": ["NCBITaxon:72248"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:100431421", "l": "LOC100431421", "d": ["olfactory receptor 51I2-like"], "t": ["NCBITaxon:9601"]}], "preferred_name": "LOC100431421", "taxa": ["NCBITaxon:9601"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:110683671", "l": "LOC110683671", "d": ["ankyrin-3-like"], "t": ["NCBITaxon:63459"]}], "preferred_name": "LOC110683671", "taxa": ["NCBITaxon:63459"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:66741728", "l": "Psal070_RS12790", "d": ["MFS transporter"], "t": ["NCBITaxon:1238"]}], "preferred_name": "Psal070_RS12790", "taxa": ["NCBITaxon:1238"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:19900930", "l": "W97_03619", "d": ["uncharacterized protein"], "t": ["NCBITaxon:1168221"]}], "preferred_name": "W97_03619", "taxa": ["NCBITaxon:1168221"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:140396381", "l": "atp2b1a", "d": ["ATPase plasma membrane Ca2+ transporting 1a"], "t": ["NCBITaxon:75743"]}], "preferred_name": "atp2b1a", "taxa": ["NCBITaxon:75743"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:142915933", "l": "LOC142915933", "d": ["uncharacterized LOC142915933"], "t": ["NCBITaxon:7757"]}], "preferred_name": "LOC142915933", "taxa": ["NCBITaxon:7757"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:132979687", "l": "LOC132979687", "d": ["5S ribosomal RNA"], "t": ["NCBITaxon:508554"]}, {"i": "ENSEMBL:ENSHRMG00000034756", "l": "", "d": [], "t": []}], "preferred_name": "LOC132979687", "taxa": ["NCBITaxon:508554"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:124160567", "l": "LOC124160567", "d": ["kielin/chordin-like protein"], "t": ["NCBITaxon:197161"]}], "preferred_name": "LOC124160567", "taxa": ["NCBITaxon:197161"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:142304191", "l": "PRR36", "d": ["proline rich 36"], "t": ["NCBITaxon:238106"]}], "preferred_name": "PRR36", "taxa": ["NCBITaxon:238106"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:103594407", "l": "LOC103594407", "d": ["40S ribosomal protein S2 pseudogene"], "t": ["NCBITaxon:482537"]}], "preferred_name": "LOC103594407", "taxa": ["NCBITaxon:482537"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:92840899", "l": "rpsG", "d": ["30S ribosomal protein S7"], "t": ["NCBITaxon:649747"]}], "preferred_name": "rpsG", "taxa": ["NCBITaxon:649747"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:121113921", "l": "LOC121113921", "d": ["small subunit ribosomal RNA"], "t": ["NCBITaxon:72036"]}, {"i": "ENSEMBL:LOC121113921", "l": "", "d": [], "t": []}], "preferred_name": "LOC121113921", "taxa": ["NCBITaxon:72036"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:141105011", "l": "TIMM44", "d": ["translocase of inner mitochondrial membrane 44"], "t": ["NCBITaxon:8400"]}], "preferred_name": "TIMM44", "taxa": ["NCBITaxon:8400"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:51369827", "l": "proS", "d": ["proline--tRNA ligase"], "t": ["NCBITaxon:82367"]}], "preferred_name": "proS", "taxa": ["NCBITaxon:82367"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:131432797", "l": "LOC131432797", "d": ["eukaryotic translation initiation factor 2-alpha kinase-like"], "t": ["NCBITaxon:325434"]}, {"i": "ENSEMBL:LOC131432797", "l": "", "d": [], "t": []}], "preferred_name": "LOC131432797", "taxa": ["NCBITaxon:325434"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:100123973", "l": "lovit", "d": ["loss of visual transmission"], "t": ["NCBITaxon:7425"]}], "preferred_name": "lovit", "taxa": ["NCBITaxon:7425"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:86865688", "l": "nrdR", "d": ["transcriptional regulator NrdR"], "t": ["NCBITaxon:1829"]}], "preferred_name": "nrdR", "taxa": ["NCBITaxon:1829"]} +{"type": "biolink:Gene", "ic": null, "identifiers": [{"i": "NCBIGene:408922", "l": "LOC408922", "d": ["discoidin domain-containing receptor 2"], "t": ["NCBITaxon:7460"]}], "preferred_name": "LOC408922", "taxa": ["NCBITaxon:7460"]} diff --git a/tests/data/compendia/Protein.txt b/tests/data/compendia/Protein.txt new file mode 100644 index 0000000..3134284 --- /dev/null +++ b/tests/data/compendia/Protein.txt @@ -0,0 +1,58 @@ +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABC9X9Q2", "l": "A0ABC9X9Q2_GRUJA Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:30415"]}], "preferred_name": "A0ABC9X9Q2_GRUJA Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:30415"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A1H2JT83", "l": "A0A1H2JT83_9ACTN Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:158898"]}], "preferred_name": "A0A1H2JT83_9ACTN Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:158898"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7J6D966", "l": "A0A7J6D966_9TELE Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:369639"]}], "preferred_name": "A0A7J6D966_9TELE Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:369639"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7R9PL73", "l": "A0A7R9PL73_TIMGE Ubiquitin-associated protein 1 (trembl)", "d": [], "t": ["NCBITaxon:629358"]}], "preferred_name": "A0A7R9PL73_TIMGE Ubiquitin-associated protein 1 (trembl)", "taxa": ["NCBITaxon:629358"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABW4QWC9", "l": "A0ABW4QWC9_9BACT Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:1844114"]}], "preferred_name": "A0ABW4QWC9_9BACT Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:1844114"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A1Y3U028", "l": "A0A1Y3U028_9ACTN DUF4097 domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:1472761"]}], "preferred_name": "A0A1Y3U028_9ACTN DUF4097 domain-containing protein (trembl)", "taxa": ["NCBITaxon:1472761"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A8S0U9F9", "l": "A0A8S0U9F9_OLEEU Bem46 isoform X2 (trembl)", "d": [], "t": ["NCBITaxon:158383"]}], "preferred_name": "A0A8S0U9F9_OLEEU Bem46 isoform X2 (trembl)", "taxa": ["NCBITaxon:158383"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:B1F859", "l": "B1F859_9BURK Sensor protein (trembl)", "d": [], "t": ["NCBITaxon:396596"]}], "preferred_name": "B1F859_9BURK Sensor protein (trembl)", "taxa": ["NCBITaxon:396596"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A161Y8T1", "l": "A0A161Y8T1_9GAMM RND efflux pump membrane fusion protein barrel-sandwich domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:1365251"]}], "preferred_name": "A0A161Y8T1_9GAMM RND efflux pump membrane fusion protein barrel-sandwich domain-containing protein (trembl)", "taxa": ["NCBITaxon:1365251"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A8S3VBG1", "l": "A0A8S3VBG1_MYTED Endonuclease/exonuclease/phosphatase domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:6550"]}], "preferred_name": "A0A8S3VBG1_MYTED Endonuclease/exonuclease/phosphatase domain-containing protein (trembl)", "taxa": ["NCBITaxon:6550"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:Q3JV53", "l": "Q3JV53_BURP1 200 kDa antigen p200 (trembl)", "d": [], "t": ["NCBITaxon:320372"]}], "preferred_name": "Q3JV53_BURP1 200 kDa antigen p200 (trembl)", "taxa": ["NCBITaxon:320372"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A0Y0TUJ4", "l": "A0A0Y0TUJ4_9FABA Photosystem I P700 chlorophyll a apoprotein A2 (trembl)", "d": [], "t": ["NCBITaxon:1789066"]}], "preferred_name": "A0A0Y0TUJ4_9FABA Photosystem I P700 chlorophyll a apoprotein A2 (trembl)", "taxa": ["NCBITaxon:1789066"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A859CV88", "l": "A0A859CV88_9GAMM 2,3-dihydroxybiphenyl 1,2-dioxygenase (trembl)", "d": [], "t": ["NCBITaxon:178399"]}], "preferred_name": "A0A859CV88_9GAMM 2,3-dihydroxybiphenyl 1,2-dioxygenase (trembl)", "taxa": ["NCBITaxon:178399"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABY9DAZ7", "l": "A0ABY9DAZ7_VITVI Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:29760"]}], "preferred_name": "A0ABY9DAZ7_VITVI Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:29760"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:Q7MVU0", "l": "Q7MVU0_PORGI DNA polymerase III delta N-terminal domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:242619"]}], "preferred_name": "Q7MVU0_PORGI DNA polymerase III delta N-terminal domain-containing protein (trembl)", "taxa": ["NCBITaxon:242619"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:K0NJ60", "l": "K0NJ60_DESTT Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:651182"]}], "preferred_name": "K0NJ60_DESTT Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:651182"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A9W7FQE5", "l": "A0A9W7FQE5_9STRA Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:1714387"]}], "preferred_name": "A0A9W7FQE5_9STRA Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:1714387"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABN6XMK8", "l": "A0ABN6XMK8_9MICO Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:1162966"]}], "preferred_name": "A0ABN6XMK8_9MICO Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:1162966"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABV5RT80", "l": "A0ABV5RT80_9ACTN Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:37484"]}], "preferred_name": "A0ABV5RT80_9ACTN Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:37484"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A8H8R2W3", "l": "A0A8H8R2W3_9HELO endo-1,3(4)-beta-glucanase (trembl)", "d": [], "t": ["NCBITaxon:1316788"]}], "preferred_name": "A0A8H8R2W3_9HELO endo-1,3(4)-beta-glucanase (trembl)", "taxa": ["NCBITaxon:1316788"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A2S3I4D3", "l": "A0A2S3I4D3_9POAL PGG domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:206008"]}], "preferred_name": "A0A2S3I4D3_9POAL PGG domain-containing protein (trembl)", "taxa": ["NCBITaxon:206008"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A136PUP3", "l": "A0A136PUP3_9ACTN RNAse (trembl)", "d": [], "t": ["NCBITaxon:47874"]}], "preferred_name": "A0A136PUP3_9ACTN RNAse (trembl)", "taxa": ["NCBITaxon:47874"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABD4TZP9", "l": "A0ABD4TZP9_9ACTO Peptidase (trembl)", "d": [], "t": ["NCBITaxon:2052"]}], "preferred_name": "A0ABD4TZP9_9ACTO Peptidase (trembl)", "taxa": ["NCBITaxon:2052"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A3Q8UDD2", "l": "A0A3Q8UDD2_IBDV Structural polyprotein (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:10995"]}], "preferred_name": "A0A3Q8UDD2_IBDV Structural polyprotein (Fragment) (trembl)", "taxa": ["NCBITaxon:10995"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A6J7X8H9", "l": "A0A6J7X8H9_9CAUD Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:2100421"]}], "preferred_name": "A0A6J7X8H9_9CAUD Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:2100421"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABN9GUJ2", "l": "A0ABN9GUJ2_9NEOB Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:386267"]}], "preferred_name": "A0ABN9GUJ2_9NEOB Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:386267"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:D2QG65", "l": "D2QG65_SPILD Short-chain dehydrogenase/reductase SDR (trembl)", "d": [], "t": ["NCBITaxon:504472"]}], "preferred_name": "D2QG65_SPILD Short-chain dehydrogenase/reductase SDR (trembl)", "taxa": ["NCBITaxon:504472"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7K9LH51", "l": "A0A7K9LH51_9PASS DnaJ homolog subfamily A member 1 (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:237438"]}], "preferred_name": "A0A7K9LH51_9PASS DnaJ homolog subfamily A member 1 (Fragment) (trembl)", "taxa": ["NCBITaxon:237438"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A3M5MPF8", "l": "A0A3M5MPF8_PSESX Glycine betaine-binding protein (trembl)", "d": [], "t": ["NCBITaxon:103985"]}], "preferred_name": "A0A3M5MPF8_PSESX Glycine betaine-binding protein (trembl)", "taxa": ["NCBITaxon:103985"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:K1U1J8", "l": "K1U1J8_9ZZZZ Site-specific recombinase (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:408170"]}], "preferred_name": "K1U1J8_9ZZZZ Site-specific recombinase (Fragment) (trembl)", "taxa": ["NCBITaxon:408170"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A4Z0WD40", "l": "A0A4Z0WD40_9GAMM Phosphoenolpyruvate synthase (trembl)", "d": [], "t": ["NCBITaxon:2759953"]}], "preferred_name": "A0A4Z0WD40_9GAMM Phosphoenolpyruvate synthase (trembl)", "taxa": ["NCBITaxon:2759953"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A3G1WXD5", "l": "A0A3G1WXD5_9HYME Cytochrome c oxidase subunit 1 (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:2423589"]}], "preferred_name": "A0A3G1WXD5_9HYME Cytochrome c oxidase subunit 1 (Fragment) (trembl)", "taxa": ["NCBITaxon:2423589"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A556MGT8", "l": "A0A556MGT8_9FLAO CBM2 domain-containing protein (trembl)", "d": [], "t": ["NCBITaxon:2597671"]}], "preferred_name": "A0A556MGT8_9FLAO CBM2 domain-containing protein (trembl)", "taxa": ["NCBITaxon:2597671"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A517L432", "l": "A0A517L432_9PEZI Origin recognition complex subunit 2 (trembl)", "d": [], "t": ["NCBITaxon:50376"]}], "preferred_name": "A0A517L432_9PEZI Origin recognition complex subunit 2 (trembl)", "taxa": ["NCBITaxon:50376"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A1S3UDC4", "l": "A0A1S3UDC4_VIGRR Protein OPI10 homolog (trembl)", "d": [], "t": ["NCBITaxon:3916"]}], "preferred_name": "A0A1S3UDC4_VIGRR Protein OPI10 homolog (trembl)", "taxa": ["NCBITaxon:3916"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A2A2D2B7", "l": "A0A2A2D2B7_9ACTN Transporter (trembl)", "d": [], "t": ["NCBITaxon:1940"]}], "preferred_name": "A0A2A2D2B7_9ACTN Transporter (trembl)", "taxa": ["NCBITaxon:1940"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABM8CTL7", "l": "A0ABM8CTL7_9NOCA Cysteine desulfurase (trembl)", "d": [], "t": ["NCBITaxon:2984338"]}], "preferred_name": "A0ABM8CTL7_9NOCA Cysteine desulfurase (trembl)", "taxa": ["NCBITaxon:2984338"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:E4ZJZ5", "l": "E4ZJZ5_LEPMJ Similar to MFS transporter (trembl)", "d": [], "t": ["NCBITaxon:985895"]}], "preferred_name": "E4ZJZ5_LEPMJ Similar to MFS transporter (trembl)", "taxa": ["NCBITaxon:985895"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A5V5F8R7", "l": "A0A5V5F8R7_SALER Uncharacterized protein (trembl)", "d": [], "t": ["NCBITaxon:28901"]}], "preferred_name": "A0A5V5F8R7_SALER Uncharacterized protein (trembl)", "taxa": ["NCBITaxon:28901"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A1G5R0P3", "l": "A0A1G5R0P3_PHOLU Pyocin immunity protein (trembl)", "d": [], "t": ["NCBITaxon:29488"]}], "preferred_name": "A0A1G5R0P3_PHOLU Pyocin immunity protein (trembl)", "taxa": ["NCBITaxon:29488"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A3B0UFL8", "l": "A0A3B0UFL8_9ZZZZ MalT-like TPR region domain-containing protein (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:652676"]}], "preferred_name": "A0A3B0UFL8_9ZZZZ MalT-like TPR region domain-containing protein (Fragment) (trembl)", "taxa": ["NCBITaxon:652676"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A175K121", "l": "A0A175K121_ENTHI Brix domain-containing protein 1 putative (trembl)", "d": [], "t": ["NCBITaxon:5759"]}], "preferred_name": "A0A175K121_ENTHI Brix domain-containing protein 1 putative (trembl)", "taxa": ["NCBITaxon:5759"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0AAD8UAG3", "l": "A0AAD8UAG3_GLOAC Heterokaryon incompatibility domain-containing protein (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:27357"]}], "preferred_name": "A0AAD8UAG3_GLOAC Heterokaryon incompatibility domain-containing protein (Fragment) (trembl)", "taxa": ["NCBITaxon:27357"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A5N8XJ09", "l": "A0A5N8XJ09_9ACTN LysR family transcriptional regulator (trembl)", "d": [], "t": ["NCBITaxon:565072"]}], "preferred_name": "A0A5N8XJ09_9ACTN LysR family transcriptional regulator (trembl)", "taxa": ["NCBITaxon:565072"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0AA41SHG1", "l": "A0AA41SHG1_PAPNU non-specific serine/threonine protein kinase (trembl)", "d": [], "t": ["NCBITaxon:74823"]}], "preferred_name": "A0AA41SHG1_PAPNU non-specific serine/threonine protein kinase (trembl)", "taxa": ["NCBITaxon:74823"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A2K6EBJ3", "l": "A0A2K6EBJ3_MACNE Short coiled-coil protein (trembl)", "d": [], "t": ["NCBITaxon:9545"]}, {"i": "ENSEMBL:ENSMNEP00000045529", "l": "", "d": [], "t": []}, {"i": "ENSEMBL:ENSMNEP00000045529.1", "l": "", "d": [], "t": []}], "preferred_name": "A0A2K6EBJ3_MACNE Short coiled-coil protein (trembl)", "taxa": ["NCBITaxon:9545"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A285VFF0", "l": "A0A285VFF0_9ACTN CoA-transferase family III (trembl)", "d": [], "t": ["NCBITaxon:38502"]}], "preferred_name": "A0A285VFF0_9ACTN CoA-transferase family III (trembl)", "taxa": ["NCBITaxon:38502"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:G8LT89", "l": "G8LT89_ACECE Glycine radical enzyme activase, YjjW family (trembl)", "d": [], "t": ["NCBITaxon:720554"]}], "preferred_name": "G8LT89_ACECE Glycine radical enzyme activase, YjjW family (trembl)", "taxa": ["NCBITaxon:720554"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0ABV0WW59", "l": "A0ABV0WW59_9TELE Dedicator of cytokinesis protein 8 (trembl)", "d": [], "t": ["NCBITaxon:208358"]}], "preferred_name": "A0ABV0WW59_9TELE Dedicator of cytokinesis protein 8 (trembl)", "taxa": ["NCBITaxon:208358"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0AAV2Q518", "l": "A0AAV2Q518_MEGNR Laminin G domain-containing protein (Fragment) (trembl)", "d": [], "t": ["NCBITaxon:48144"]}], "preferred_name": "A0AAV2Q518_MEGNR Laminin G domain-containing protein (Fragment) (trembl)", "taxa": ["NCBITaxon:48144"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7M7H3Y8", "l": "A0A7M7H3Y8_NASVI Proton-associated sugar transporter A (trembl)", "d": [], "t": ["NCBITaxon:7425"]}], "preferred_name": "A0A7M7H3Y8_NASVI Proton-associated sugar transporter A (trembl)", "taxa": ["NCBITaxon:7425"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7M7GCA5", "l": "A0A7M7GCA5_NASVI Proton-associated sugar transporter A (trembl)", "d": [], "t": ["NCBITaxon:7425"]}], "preferred_name": "A0A7M7GCA5_NASVI Proton-associated sugar transporter A (trembl)", "taxa": ["NCBITaxon:7425"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A0V9UNI2", "l": "A0A0V9UNI2_9NOCA Transcriptional repressor NrdR (trembl)", "d": [], "t": ["NCBITaxon:1441730"]}], "preferred_name": "A0A0V9UNI2_9NOCA Transcriptional repressor NrdR (trembl)", "taxa": ["NCBITaxon:1441730"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A6I4X5I4", "l": "A0A6I4X5I4_RHORH Transcriptional repressor NrdR (trembl)", "d": [], "t": ["NCBITaxon:1829"]}], "preferred_name": "A0A6I4X5I4_RHORH Transcriptional repressor NrdR (trembl)", "taxa": ["NCBITaxon:1829"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A562E6K3", "l": "A0A562E6K3_RHORH Transcriptional repressor NrdR (trembl)", "d": [], "t": ["NCBITaxon:935266"]}], "preferred_name": "A0A562E6K3_RHORH Transcriptional repressor NrdR (trembl)", "taxa": ["NCBITaxon:935266"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7M7L9F7", "l": "A0A7M7L9F7_APIME Discoidin domain-containing receptor 2 isoform X2 (trembl)", "d": [], "t": ["NCBITaxon:7460"]}], "preferred_name": "A0A7M7L9F7_APIME Discoidin domain-containing receptor 2 isoform X2 (trembl)", "taxa": ["NCBITaxon:7460"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7M7MHA0", "l": "A0A7M7MHA0_APIME Discoidin domain-containing receptor 2 isoform X3 (trembl)", "d": [], "t": ["NCBITaxon:7460"]}], "preferred_name": "A0A7M7MHA0_APIME Discoidin domain-containing receptor 2 isoform X3 (trembl)", "taxa": ["NCBITaxon:7460"]} +{"type": "biolink:Protein", "ic": null, "identifiers": [{"i": "UniProtKB:A0A7M7L653", "l": "A0A7M7L653_APIME Discoidin domain-containing receptor 2 isoform X4 (trembl)", "d": [], "t": ["NCBITaxon:7460"]}], "preferred_name": "A0A7M7L653_APIME Discoidin domain-containing receptor 2 isoform X4 (trembl)", "taxa": ["NCBITaxon:7460"]} diff --git a/tests/data/conflation/GeneProtein.txt b/tests/data/conflation/GeneProtein.txt new file mode 100644 index 0000000..a47858a --- /dev/null +++ b/tests/data/conflation/GeneProtein.txt @@ -0,0 +1,3 @@ +["NCBIGene:100123973", "UniProtKB:A0A7M7GCA5", "UniProtKB:A0A7M7H3Y8"] +["NCBIGene:408922", "UniProtKB:A0A7M7L653", "UniProtKB:A0A7M7L9F7", "UniProtKB:A0A7M7MHA0", "UniProtKB:A0A7M7MKP6"] +["NCBIGene:86865688", "UniProtKB:A0A0V9UNI2", "UniProtKB:A0A562E6K3", "UniProtKB:A0A6I4X5I4"] From 1d4307706536bed4a22e37d96647312098028797 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 00:53:51 -0500 Subject: [PATCH 02/16] Add GitHub Actions integration test pipeline. - New .github/workflows/test.yml with parallel unit-tests and integration-tests jobs; integration job uses a Redis service container on port 6379. - New docker-compose-redis.yml for running Redis locally during development. - New tests/test_integration.py with 27 tests across 6 classes (status, get_normalized_nodes, semantic_types, curie_prefixes, setid, query, conflations), all marked pytest.mark.integration; data-loading placeholder present for Phase 2. - Updated tests/conftest.py to add session-scoped integration_client fixture with Redis availability check and TestClient context manager. - Updated tests/test_callback.py to skip test_async_query_callback (requires external callback-app container). - Updated pytest.ini to register integration and callback_integration markers and document the plain-pytest limitation from test_norm/test_setid module-level app.state mutation. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/test.yml | 61 +++++++ docker-compose-redis.yml | 10 ++ pytest.ini | 15 +- tests/conftest.py | 43 +++++ tests/test_callback.py | 2 + tests/test_integration.py | 356 +++++++++++++++++++++++++++++++++++++ 6 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test.yml create mode 100644 docker-compose-redis.yml create mode 100644 tests/test_integration.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..91f143c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Tests + +on: + push: + pull_request: + +jobs: + unit-tests: + name: Unit Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run unit tests + run: pytest -v -m "not integration" --tb=short + + integration-tests: + name: Integration Tests + runs-on: ubuntu-latest + services: + redis: + image: redis:alpine + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 5s + --health-timeout 3s + --health-retries 10 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + + - name: Install dependencies + run: pip install -r requirements.txt + + # PLACEHOLDER: create tests/data/config.json and load test data into Redis + # Example (fill in once tests/data/config.json is finalized): + # python -c " + # import asyncio + # from node_normalizer.loader import NodeLoader + # loader = NodeLoader('tests/data/config.json') + # asyncio.run(loader.load(100_000)) + # " + + - name: Run integration tests + run: pytest -v -m "integration" --tb=short diff --git a/docker-compose-redis.yml b/docker-compose-redis.yml new file mode 100644 index 0000000..e5b591b --- /dev/null +++ b/docker-compose-redis.yml @@ -0,0 +1,10 @@ +services: + redis: + image: redis:alpine + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 10 diff --git a/pytest.ini b/pytest.ini index 224b542..d01a25d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,17 @@ log_cli=false log_cli_level=DEBUG log_file=tests.log log_file_level=DEBUG -asyncio_mode=auto \ No newline at end of file +asyncio_mode=auto + +# Custom markers +markers = + integration: marks tests that require real Redis (run with: pytest -m integration) + callback_integration: marks tests requiring the callback-app Docker container + +# WARNING: running plain `pytest` without a -m filter may fail or produce +# unpredictable results. test_norm.py and test_setid.py assign directly to +# app.state.* at module-import time (before any fixture runs), which permanently +# mutates the shared FastAPI app singleton. Use explicit marker filters instead: +# +# pytest -m "not integration" — unit tests only (no Redis required) +# pytest -m "integration" — integration tests only (requires Redis + data) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 7d2bb28..157ab79 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,13 @@ import sys import os import time +import socket import pytest import logging from testcontainers.compose import DockerCompose from testcontainers.core.docker_client import DockerClient +from fastapi.testclient import TestClient from node_normalizer.util import LoggingUtil @@ -49,3 +51,44 @@ def stop(): request.addfinalizer(stop) return compose, nn_url, callback_url + + +@pytest.fixture(scope="session") +def integration_client(): + """ + Session-scoped fixture for integration tests. + + Requires Redis running on localhost:6379. In CI this is provided by the + GitHub Actions services.redis block in .github/workflows/test.yml. + Locally: docker compose -f docker-compose-redis.yml up -d + + The TestClient context manager triggers startup_event(), which establishes + real Redis connections via redis_config.yaml. All integration tests share + this single session; tests must be read-only (no writes to Redis). + + Note: startup_event also downloads the Biolink Model YAML from GitHub via + bmt. This is slow on the first run; bmt caches it in ~/.cache afterward. + """ + # Verify Redis is reachable before starting the app; fail fast with a clear + # message rather than a cryptic connection error from deep inside the app. + redis_host, redis_port = "127.0.0.1", 6379 + try: + with socket.create_connection((redis_host, redis_port), timeout=5): + pass + except OSError as exc: + raise RuntimeError( + f"Integration tests require Redis on {redis_host}:{redis_port}. " + "Start it with: docker compose -f docker-compose-redis.yml up -d" + ) from exc + + # PLACEHOLDER: load test data into Redis before starting the app. + # Fill this in once tests/data/config.json is finalized, e.g.: + # + # import asyncio + # from node_normalizer.loader import NodeLoader + # loader = NodeLoader("tests/data/config.json") + # asyncio.run(loader.load(100_000)) + + from node_normalizer.server import app + with TestClient(app) as client: + yield client diff --git a/tests/test_callback.py b/tests/test_callback.py index e81f246..33db72e 100644 --- a/tests/test_callback.py +++ b/tests/test_callback.py @@ -2,6 +2,7 @@ import json import time +import pytest import reasoner_pydantic import requests import fastapi @@ -14,6 +15,7 @@ premerged_response = Path(__file__).parent / "resources" / "premerged_response.json" +@pytest.mark.skip(reason="Requires external callback-app container. See docker-compose-test.yml.") def test_async_query_callback(session): # host_url = "http://r3:8080/asyncquery" diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..8cea6f0 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,356 @@ +""" +Integration tests for NodeNormalization. + +These tests require a real Redis instance loaded with test data. +Run with: pytest -v -m integration + +Prerequisites: + docker compose -f docker-compose-redis.yml up -d + # load test data (see tests/conftest.py integration_client fixture) + +Test CURIEs are sourced from tests/data/compendia/Gene.txt lines 1-2 and +tests/data/conflation/GeneProtein.txt lines 1-3. + + Gene line 1: NCBIGene:65329674 — trnS-UGA, single-identifier clique + Gene line 2: NCBIGene:106478148 — canonical; ENSEMBL:LOC106478148 is equivalent + GeneProtein: NCBIGene:100123973 + UniProtKB:A0A7M7GCA5, UniProtKB:A0A7M7H3Y8 +""" + +import pytest + +pytestmark = pytest.mark.integration + +# --------------------------------------------------------------------------- +# Canonical gene CURIE with one identifier in its clique +SINGLE_ID_GENE = "NCBIGene:65329674" + +# Canonical gene CURIE with two identifiers in its clique +CANONICAL_GENE = "NCBIGene:106478148" +NON_CANONICAL_GENE = "ENSEMBL:LOC106478148" # equivalent to CANONICAL_GENE + +# Gene present in GeneProtein conflation +CONFLATION_GENE = "NCBIGene:100123973" +CONFLATION_UNIPROT_1 = "UniProtKB:A0A7M7GCA5" +CONFLATION_UNIPROT_2 = "UniProtKB:A0A7M7H3Y8" + +UNKNOWN_CURIE = "UNKNOWN:000000" +ANOTHER_UNKNOWN = "FAKE:999999" + + +# =========================================================================== +class TestStatusEndpoint: + + def test_status_returns_running(self, integration_client): + response = integration_client.get("/status") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "running" + + def test_status_contains_all_database_names(self, integration_client): + response = integration_client.get("/status") + assert response.status_code == 200 + databases = response.json()["databases"] + expected = { + "eq_id_to_id_db", + "id_to_eqids_db", + "id_to_type_db", + "curie_to_bl_type_db", + "info_content_db", + "gene_protein_db", + "chemical_drug_db", + } + assert set(databases.keys()) == expected + + def test_status_core_databases_have_nonzero_counts(self, integration_client): + """Canary: confirms that data loading ran before these tests.""" + response = integration_client.get("/status") + assert response.status_code == 200 + databases = response.json()["databases"] + for db_name in ("eq_id_to_id_db", "id_to_eqids_db", "id_to_type_db"): + count = databases[db_name]["count"] + assert count > 0, f"{db_name} has count=0; was test data loaded into Redis?" + + +# =========================================================================== +class TestGetNormalizedNodes: + + def test_get_known_canonical_gene_curie(self, integration_client): + response = integration_client.get( + "/get_normalized_nodes", params={"curie": [CANONICAL_GENE]} + ) + assert response.status_code == 200 + result = response.json() + assert CANONICAL_GENE in result + node = result[CANONICAL_GENE] + assert node is not None + assert node["id"]["identifier"] == CANONICAL_GENE + assert "biolink:Gene" in node["type"] + + def test_get_non_canonical_resolves_to_canonical(self, integration_client): + """An equivalent (non-canonical) CURIE should resolve to the canonical.""" + response = integration_client.get( + "/get_normalized_nodes", params={"curie": [NON_CANONICAL_GENE]} + ) + assert response.status_code == 200 + result = response.json() + assert NON_CANONICAL_GENE in result + node = result[NON_CANONICAL_GENE] + assert node is not None + assert node["id"]["identifier"] == CANONICAL_GENE + + def test_get_unknown_curie_returns_null(self, integration_client): + """Unknown CURIE should return None (not an error).""" + response = integration_client.get( + "/get_normalized_nodes", params={"curie": [UNKNOWN_CURIE]} + ) + assert response.status_code == 200 + result = response.json() + assert result == {UNKNOWN_CURIE: None} + + def test_get_mix_known_and_unknown(self, integration_client): + response = integration_client.get( + "/get_normalized_nodes", + params={"curie": [UNKNOWN_CURIE, CANONICAL_GENE]}, + ) + assert response.status_code == 200 + result = response.json() + assert len(result) == 2 + assert result[UNKNOWN_CURIE] is None + assert result[CANONICAL_GENE] is not None + assert result[CANONICAL_GENE]["id"]["identifier"] == CANONICAL_GENE + + def test_get_all_unknown_returns_dict_of_nulls(self, integration_client): + """ + Regression for https://github.com/NCATSTranslator/NodeNormalization/issues/113 + Previously returned {} instead of {curie: None, ...}. + """ + response = integration_client.get( + "/get_normalized_nodes", + params={"curie": [UNKNOWN_CURIE, ANOTHER_UNKNOWN]}, + ) + assert response.status_code == 200 + result = response.json() + assert result == {UNKNOWN_CURIE: None, ANOTHER_UNKNOWN: None} + + def test_post_known_canonical_gene_curie(self, integration_client): + response = integration_client.post( + "/get_normalized_nodes", json={"curies": [CANONICAL_GENE]} + ) + assert response.status_code == 200 + result = response.json() + assert CANONICAL_GENE in result + assert result[CANONICAL_GENE]["id"]["identifier"] == CANONICAL_GENE + + def test_post_unknown_curie_returns_null(self, integration_client): + response = integration_client.post( + "/get_normalized_nodes", json={"curies": [UNKNOWN_CURIE]} + ) + assert response.status_code == 200 + result = response.json() + assert result == {UNKNOWN_CURIE: None} + + def test_post_all_unknown_returns_dict_of_nulls(self, integration_client): + response = integration_client.post( + "/get_normalized_nodes", + json={"curies": [UNKNOWN_CURIE, ANOTHER_UNKNOWN]}, + ) + assert response.status_code == 200 + result = response.json() + assert result == {UNKNOWN_CURIE: None, ANOTHER_UNKNOWN: None} + + def test_get_empty_list_returns_422(self, integration_client): + response = integration_client.get( + "/get_normalized_nodes", params={"curie": []} + ) + assert response.status_code == 422 + + def test_post_empty_list_returns_422(self, integration_client): + response = integration_client.post( + "/get_normalized_nodes", json={"curies": []} + ) + assert response.status_code == 422 + + def test_conflate_true_includes_uniprot_identifiers(self, integration_client): + """With conflate=True, a gene in GeneProtein.txt should have UniProtKB equivalents.""" + response = integration_client.get( + "/get_normalized_nodes", + params={"curie": [CONFLATION_GENE], "conflate": True}, + ) + assert response.status_code == 200 + result = response.json() + node = result.get(CONFLATION_GENE) + assert node is not None + equiv_ids = [eq["identifier"] for eq in node.get("equivalent_identifiers", [])] + assert any(eid.startswith("UniProtKB:") for eid in equiv_ids), ( + f"Expected UniProtKB identifiers in equivalents with conflate=True, got: {equiv_ids}" + ) + + def test_conflate_false_excludes_uniprot_identifiers(self, integration_client): + """With conflate=False, UniProtKB identifiers from conflation should not appear.""" + response = integration_client.get( + "/get_normalized_nodes", + params={"curie": [CONFLATION_GENE], "conflate": False}, + ) + assert response.status_code == 200 + result = response.json() + node = result.get(CONFLATION_GENE) + assert node is not None + equiv_ids = [eq["identifier"] for eq in node.get("equivalent_identifiers", [])] + assert not any(eid.startswith("UniProtKB:") for eid in equiv_ids), ( + f"Expected no UniProtKB identifiers with conflate=False, got: {equiv_ids}" + ) + + +# =========================================================================== +class TestGetSemanticTypes: + + def test_semantic_types_nonempty(self, integration_client): + response = integration_client.get("/get_semantic_types") + assert response.status_code == 200 + data = response.json() + types = data["semantic_types"]["types"] + assert len(types) > 0 + + def test_semantic_types_includes_gene_and_protein(self, integration_client): + response = integration_client.get("/get_semantic_types") + assert response.status_code == 200 + types = response.json()["semantic_types"]["types"] + assert "biolink:Gene" in types + assert "biolink:Protein" in types + + +# =========================================================================== +class TestGetCuriePrefixes: + + def test_get_all_prefixes_nonempty(self, integration_client): + response = integration_client.get("/get_curie_prefixes") + assert response.status_code == 200 + data = response.json() + assert len(data) > 0 + + def test_get_gene_prefixes_contains_ncbigene(self, integration_client): + response = integration_client.get( + "/get_curie_prefixes", + params={"semantic_type": ["biolink:Gene"]}, + ) + assert response.status_code == 200 + data = response.json() + assert "biolink:Gene" in data + prefixes = data["biolink:Gene"]["curie_prefix"] + assert "NCBIGene" in prefixes + + def test_post_gene_prefixes_contains_ncbigene(self, integration_client): + response = integration_client.post( + "/get_curie_prefixes", + json={"semantic_types": ["biolink:Gene"]}, + ) + assert response.status_code == 200 + data = response.json() + assert "biolink:Gene" in data + prefixes = data["biolink:Gene"]["curie_prefix"] + assert "NCBIGene" in prefixes + + def test_unknown_semantic_type_returns_empty_dict(self, integration_client): + response = integration_client.get( + "/get_curie_prefixes", + params={"semantic_type": ["biolink:NonExistentType"]}, + ) + assert response.status_code == 200 + assert response.json() == {} + + +# =========================================================================== +class TestGetSetId: + + def test_setid_is_deterministic(self, integration_client): + """Calling get_setid twice with the same CURIEs returns the same hash.""" + params = {"curie": [CANONICAL_GENE, SINGLE_ID_GENE]} + r1 = integration_client.get("/get_setid", params=params) + r2 = integration_client.get("/get_setid", params=params) + assert r1.status_code == 200 + assert r2.status_code == 200 + assert r1.json()["setid"] == r2.json()["setid"] + + def test_setid_normalizes_before_hashing(self, integration_client): + """ + ENSEMBL:LOC106478148 and NCBIGene:106478148 are equivalent. + Their setid should match the setid for just NCBIGene:106478148 since + normalization collapses them to the same canonical CURIE. + """ + r_canonical = integration_client.get( + "/get_setid", params={"curie": [CANONICAL_GENE]} + ) + r_non_canonical = integration_client.get( + "/get_setid", params={"curie": [NON_CANONICAL_GENE]} + ) + assert r_canonical.status_code == 200 + assert r_non_canonical.status_code == 200 + assert r_canonical.json()["setid"] == r_non_canonical.json()["setid"] + + def test_post_setid_multi_set(self, integration_client): + """POST /get_setid accepts a list of sets and returns a list of results.""" + payload = [ + {"curies": [CANONICAL_GENE]}, + {"curies": [SINGLE_ID_GENE, CANONICAL_GENE]}, + ] + response = integration_client.post("/get_setid", json=payload) + assert response.status_code == 200 + results = response.json() + assert isinstance(results, list) + assert len(results) == 2 + # The two sets are different, so their setids should differ + assert results[0]["setid"] != results[1]["setid"] + + def test_get_setid_no_params_returns_422(self, integration_client): + response = integration_client.get("/get_setid", params={"curie": []}) + assert response.status_code == 422 + + +# =========================================================================== +class TestQueryEndpoint: + + def test_query_with_known_gene_node(self, integration_client): + """Minimal TRAPI query containing a known gene CURIE should normalize it.""" + payload = { + "message": { + "query_graph": { + "nodes": {"n0": {"ids": [CANONICAL_GENE]}}, + "edges": {}, + }, + "knowledge_graph": { + "nodes": { + CANONICAL_GENE: { + "categories": ["biolink:Gene"], + "name": "LOC106478148", + } + }, + "edges": {}, + }, + "results": [], + } + } + response = integration_client.post("/query", json=payload) + assert response.status_code == 200 + + def test_query_with_empty_knowledge_graph(self, integration_client): + """TRAPI with empty knowledge graph should return 200.""" + payload = { + "message": { + "query_graph": {"nodes": {}, "edges": {}}, + "knowledge_graph": {"nodes": {}, "edges": {}}, + "results": [], + } + } + response = integration_client.post("/query", json=payload) + assert response.status_code == 200 + + +# =========================================================================== +class TestGetAllowedConflations: + + def test_conflations_list_contains_expected_types(self, integration_client): + response = integration_client.get("/get_allowed_conflations") + assert response.status_code == 200 + conflations = response.json()["conflations"] + assert "GeneProtein" in conflations + assert "DrugChemical" in conflations From ca9f64771b103514ed34110005f60c7ac9cdd290 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:13:00 -0500 Subject: [PATCH 03/16] Allow NodeLoader to accept an optional config file path. NodeLoader.__init__ and get_config now accept an optional config_file parameter (defaults to the project-root config.json, preserving existing behavior). This removes the need for monkeypatching in tests and lets the integration test fixture and CI loading step pass tests/data/config.json directly. Also adds tests/data/config.json pointing at the Gene/Protein compendia and GeneProtein conflation under tests/data/, and wires it into the integration_client fixture and the GitHub Actions loading step. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/test.yml | 16 ++++++++-------- node_normalizer/loader.py | 13 +++++++------ tests/conftest.py | 15 ++++++++------- tests/data/config.json | 17 +++++++++++++++++ 4 files changed, 40 insertions(+), 21 deletions(-) create mode 100644 tests/data/config.json diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 91f143c..2238349 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,14 +48,14 @@ jobs: - name: Install dependencies run: pip install -r requirements.txt - # PLACEHOLDER: create tests/data/config.json and load test data into Redis - # Example (fill in once tests/data/config.json is finalized): - # python -c " - # import asyncio - # from node_normalizer.loader import NodeLoader - # loader = NodeLoader('tests/data/config.json') - # asyncio.run(loader.load(100_000)) - # " + - name: Load test data into Redis + run: | + python -c " + import asyncio + from node_normalizer.loader import NodeLoader + loader = NodeLoader('tests/data/config.json') + asyncio.run(loader.load(100_000)) + " - name: Run integration tests run: pytest -v -m "integration" --tb=short diff --git a/node_normalizer/loader.py b/node_normalizer/loader.py index 6c4e7a0..1bc57b4 100644 --- a/node_normalizer/loader.py +++ b/node_normalizer/loader.py @@ -4,7 +4,7 @@ from pathlib import Path from itertools import islice from datetime import datetime -from typing import Dict, Any +from typing import Dict, Any, Optional import json import hashlib from itertools import combinations @@ -26,8 +26,8 @@ class NodeLoader: a redis database. """ - def __init__(self): - self._config = self.get_config() + def __init__(self, config_file: Optional[Path] = None): + self._config = self.get_config(config_file) self._compendium_directory: Path = Path(self._config["compendium_directory"]) self._conflation_directory: Path = Path(self._config["conflation_directory"]) @@ -58,11 +58,12 @@ def get_ancestors(self, input_type): return ancs @staticmethod - def get_config() -> Dict[str, Any]: + def get_config(config_file: Optional[Path] = None) -> Dict[str, Any]: """get configuration file""" - cname = Path(__file__).parents[1] / "config.json" + if config_file is None: + config_file = Path(__file__).parents[1] / "config.json" - with open(cname, "r") as json_file: + with open(config_file, "r") as json_file: data = json.load(json_file) return data diff --git a/tests/conftest.py b/tests/conftest.py index 157ab79..25f2859 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,8 @@ import os import time import socket +import asyncio +from pathlib import Path import pytest import logging @@ -81,13 +83,12 @@ def integration_client(): "Start it with: docker compose -f docker-compose-redis.yml up -d" ) from exc - # PLACEHOLDER: load test data into Redis before starting the app. - # Fill this in once tests/data/config.json is finalized, e.g.: - # - # import asyncio - # from node_normalizer.loader import NodeLoader - # loader = NodeLoader("tests/data/config.json") - # asyncio.run(loader.load(100_000)) + # Load test data into Redis using the test-specific config. + from node_normalizer.loader import NodeLoader + + test_config_path = Path(__file__).parent / "data" / "config.json" + loader = NodeLoader(test_config_path) + asyncio.run(loader.load(100_000)) from node_normalizer.server import app with TestClient(app) as client: diff --git a/tests/data/config.json b/tests/data/config.json new file mode 100644 index 0000000..4408cb4 --- /dev/null +++ b/tests/data/config.json @@ -0,0 +1,17 @@ +{ + "compendium_directory": "tests/data/compendia", + "conflation_directory": "tests/data/conflation", + "data_files": [ + "Gene.txt", + "Protein.txt" + ], + "test_mode": 0, + "debug_messages": 1, + "conflations": [ + { + "types": ["biolink:Gene", "biolink:Protein"], + "file": "GeneProtein.txt", + "redis_db": "gene_protein_db" + } + ] +} From a90e7afdd69ae099e367b4089eec92f201c547bc Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:22:57 -0500 Subject: [PATCH 04/16] Fix bmt API rename and mark integration tests xfail. bmt.utils.format_element was renamed to bmt.util.format in the installed version (bmt==1.4.3); update the import in loader.py and normalizer.py. Integration tests are marked xfail because bmt-lite raises ValueError when Toolkit() is called with a schema URL in startup_event (server.py:75). Add tests/README.md documenting this and the unawaited-coroutine issue in test_loader.py::test_nn_load. Co-Authored-By: Claude Sonnet 4.6 --- node_normalizer/loader.py | 2 +- node_normalizer/normalizer.py | 2 +- tests/README.md | 47 +++++++++++++++++++++++++++++++++++ tests/test_integration.py | 11 +++++++- 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 tests/README.md diff --git a/node_normalizer/loader.py b/node_normalizer/loader.py index 1bc57b4..48b6ff5 100644 --- a/node_normalizer/loader.py +++ b/node_normalizer/loader.py @@ -12,7 +12,7 @@ import os from .redis_adapter import RedisConnectionFactory, RedisConnection from bmt import Toolkit -from bmt.utils import format_element as bmt_format +from bmt.util import format as bmt_format from .util import LoggingUtil diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index 5f92e2f..b27e692 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -12,7 +12,7 @@ import traceback from typing import List, Dict, Optional, Any, Set, Tuple, Union from uuid import UUID -from bmt.utils import format_element as bmt_format +from bmt.util import format as bmt_format from fastapi import FastAPI from reasoner_pydantic import KnowledgeGraph, Message, QueryGraph, Result, CURIE, Attribute diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..edf4126 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,47 @@ +# Test Suite Notes + +## Running tests + +```bash +pytest -m "not integration" # unit tests — no Redis needed +pytest -m "integration" # integration tests — requires Redis on localhost:6379 +``` + +Start Redis locally with: +```bash +docker compose -f docker-compose-redis.yml up -d +``` + +--- + +## Tests that do not currently pass + +### All integration tests (`tests/test_integration.py`) + +**Status:** `xfail` (expected failure) + +**Root cause:** `bmt==1.4.3` (installed in this environment) is a "lite" variant that +raises `ValueError: bmt-lite does not support the 'schema' argument` when `server.py` +calls `Toolkit(BIOLINK_MODEL_URL)` during `startup_event`. The integration test fixture +(`integration_client` in `conftest.py`) starts the app via `TestClient`, which triggers +`startup_event`, so all 28 integration tests fail at setup before any test body runs. + +**Estimated fix:** Determine the correct `bmt` version that supports passing a schema URL +to `Toolkit()`, update `requirements.txt`, and re-run. If no such version is easily +available, the alternative is to make the Biolink Model URL optional in `startup_event` +and fall back to `Toolkit()` (no URL) for local/test use. Probably 30–60 minutes of work. + +--- + +### `tests/test_loader.py::test_nn_load` + +**Status:** Passes, but emits a `RuntimeWarning: coroutine 'NodeLoader.load_compendium' was never awaited`. + +**Root cause:** `load_compendium` is an `async` method but `test_nn_load` calls it +synchronously: `assert node_loader.load_compendium(good_json, 5)`. This passes today +only because the return value of an unawaited coroutine is truthy. The actual loading +logic never executes. + +**Estimated fix:** Refactor the test to `await` the call, or add a synchronous wrapper. +Low effort (~5 minutes) but requires understanding whether the test was intentionally +bypassing the async path via `_test_mode = 1`. diff --git a/tests/test_integration.py b/tests/test_integration.py index 8cea6f0..c9689c3 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -18,7 +18,16 @@ import pytest -pytestmark = pytest.mark.integration +pytestmark = [ + pytest.mark.integration, + pytest.mark.xfail( + reason=( + "bmt-lite incompatibility: installed bmt==1.4.3 raises ValueError when " + "Toolkit() is passed a schema URL (server.py:75). See tests/README.md." + ), + strict=False, + ), +] # --------------------------------------------------------------------------- # Canonical gene CURIE with one identifier in its clique From 4ed9b456fb2ed7b5a1cc5f595a0b8c47c6933ff8 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:23:39 -0500 Subject: [PATCH 05/16] Increased bmt version to 1.4.6. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5b4eda1..fcd9b34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ aioredis~=1.3.1 click -bmt==1.4.3 +bmt==1.4.6 deepdiff==8.6.1 fastapi~=0.108.0 httptools From a0502457f1d2e157122ed107944666950c145962 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:25:43 -0500 Subject: [PATCH 06/16] Reverted bmt to 1.4.3. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index fcd9b34..5b4eda1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ aioredis~=1.3.1 click -bmt==1.4.6 +bmt==1.4.3 deepdiff==8.6.1 fastapi~=0.108.0 httptools From f57dc8a858fbfc806db09e89c9ceada893a6ee39 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:26:06 -0500 Subject: [PATCH 07/16] Updated requirements.lock. --- requirements.lock | 95 +++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 66 deletions(-) diff --git a/requirements.lock b/requirements.lock index f793f90..5b03f60 100644 --- a/requirements.lock +++ b/requirements.lock @@ -1,86 +1,49 @@ aioredis==1.3.1 -annotated-types==0.7.0 anyio==3.7.1 -asgiref==3.8.1 +asgiref==3.7.2 async-timeout==4.0.3 -attrs==23.2.0 -bmt==1.4.3 +attrs==23.1.0 bmt-lite-v3.1.0==2.2.2 -bmt-lite-v3.6.0==2.3.0 -certifi==2024.6.2 -charset-normalizer==3.3.2 -click==8.1.7 -coverage==7.5.3 -curies==0.7.10 +certifi==2023.7.22 +charset-normalizer==2.1.1 +click==8.0.4 +coverage==7.3.2 deepdiff==5.8.1 -Deprecated==1.2.14 deprecation==2.1.0 -docker==7.1.0 -fastapi==0.108.0 -googleapis-common-protos==1.59.1 -grpcio==1.64.1 -gunicorn==22.0.0 +docker==6.1.3 +fastapi==0.83.0 +gunicorn==20.1.0 h11==0.12.0 -hbreader==0.9.1 -hiredis==2.3.2 -httpcore==0.15.0 -httptools==0.6.1 -httpx==0.23.0 -idna==3.7 -importlib-metadata==6.11.0 +hiredis==2.2.3 +httpcore==0.13.7 +httptools==0.5.0 +httpx==0.19.0 +idna==3.4 iniconfig==2.0.0 -isodate==0.6.1 -json-flattener==0.1.9 -jsonasobj2==1.0.4 jsonschema==4.6.2 -linkml-runtime==1.8.0 -opentelemetry-api==1.21.0 -opentelemetry-exporter-jaeger==1.21.0 -opentelemetry-exporter-jaeger-proto-grpc==1.21.0 -opentelemetry-exporter-jaeger-thrift==1.21.0 -opentelemetry-instrumentation==0.42b0 -opentelemetry-instrumentation-asgi==0.42b0 -opentelemetry-instrumentation-fastapi==0.42b0 -opentelemetry-instrumentation-httpx==0.42b0 -opentelemetry-sdk==1.21.0 -opentelemetry-semantic-conventions==0.42b0 -opentelemetry-util-http==0.42b0 ordered-set==4.1.0 -orjson==3.9.15 -packaging==24.1 -pluggy==1.5.0 -prefixcommons==0.1.12 -prefixmaps==0.2.4 -protobuf==4.25.3 +orjson==3.8.10 +packaging==23.2 +pluggy==1.3.0 py==1.11.0 -pydantic==1.10.16 -pydantic_core==2.18.4 -pyparsing==3.1.2 -pyrsistent==0.20.0 +pydantic==1.10.13 +pyrsistent==0.19.3 pytest==6.2.5 pytest-asyncio==0.18.3 pytest-cov==3.0.0 -pytest-logging==2015.11.4 -PyTrie==0.4.0 -PyYAML==6.0.1 -rdflib==7.0.0 -reasoner-pydantic==4.1.5 +PyYAML==6.0 +reasoner-pydantic==4.0.8 redis==3.5.3 redis-py-cluster==2.1.3 -requests==2.31.0 +requests==2.28.1 rfc3986==1.5.0 -setuptools==70.1.0 -six==1.16.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -starlette==0.32.0.post1 -stringcase==1.2.0 +sniffio==1.3.0 +starlette==0.19.1 testcontainers==3.6.1 -thrift==0.20.0 toml==0.10.2 -typing_extensions==4.12.2 -urllib3==2.2.2 +typing_extensions==4.8.0 +urllib3==1.26.17 uvicorn==0.17.6 -uvloop==0.19.0 -wrapt==1.16.0 -zipp==3.19.2 +uvloop==0.17.0 +websocket-client==1.6.4 +wrapt==1.15.0 From 5fe397de00be538d5eb98d703f40f0b9ef464096 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:27:09 -0500 Subject: [PATCH 08/16] Removed pinned versions from requirements.txt. I just don't like 'em. --- requirements.txt | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5b4eda1..9bf5784 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,27 +1,27 @@ -aioredis~=1.3.1 +aioredis click -bmt==1.4.3 -deepdiff==8.6.1 -fastapi~=0.108.0 +bmt +deepdiff +fastapi httptools -jsonschema~=4.6.0 -pytest==6.2.5 -pytest-cov==3.0.0 -pytest-asyncio==0.18.3 -pyyaml~=6.0 -reasoner-pydantic==4.1.5 -redis~=3.5.3 -redis-py-cluster==2.1.3 -requests==2.32.4 -testcontainers==3.6.1 +jsonschema +pytest +pytest-cov +pytest-asyncio +pyyaml +reasoner-pydantic +redis +redis-py-cluster +requests +testcontainers uvicorn uvloop -gunicorn==23.0.0 -orjson==3.9.15 -httpx==0.23.0 +gunicorn +orjson +httpx # To support Open Telemetry -opentelemetry-sdk==1.27.0 -opentelemetry-exporter-otlp-proto-grpc==1.27.0 -opentelemetry-instrumentation-fastapi==0.48b0 -opentelemetry-instrumentation-httpx==0.48b0 +opentelemetry-sdk +opentelemetry-exporter-otlp-proto-grpc +opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-httpx From 446d8bc088e2ff140f671e3b1797b064d9e43988 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:27:49 -0500 Subject: [PATCH 09/16] Updated requirements.txt. --- requirements.lock | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/requirements.lock b/requirements.lock index 5b03f60..15ec71b 100644 --- a/requirements.lock +++ b/requirements.lock @@ -3,47 +3,78 @@ anyio==3.7.1 asgiref==3.7.2 async-timeout==4.0.3 attrs==23.1.0 +bmt==1.4.6 bmt-lite-v3.1.0==2.2.2 certifi==2023.7.22 charset-normalizer==2.1.1 -click==8.0.4 +click==8.3.1 coverage==7.3.2 +curies==0.7.10 deepdiff==5.8.1 +Deprecated==1.3.1 deprecation==2.1.0 docker==6.1.3 fastapi==0.83.0 +googleapis-common-protos==1.72.0 +grpcio==1.78.0 gunicorn==20.1.0 h11==0.12.0 +hbreader==0.9.1 hiredis==2.2.3 httpcore==0.13.7 httptools==0.5.0 httpx==0.19.0 idna==3.4 +importlib_metadata==8.7.1 iniconfig==2.0.0 +json-flattener==0.1.9 +jsonasobj2==1.0.4 jsonschema==4.6.2 +linkml-runtime==1.10.0 +opentelemetry-api==1.39.1 +opentelemetry-exporter-otlp-proto-common==1.39.1 +opentelemetry-exporter-otlp-proto-grpc==1.39.1 +opentelemetry-instrumentation==0.60b1 +opentelemetry-instrumentation-asgi==0.60b1 +opentelemetry-instrumentation-fastapi==0.60b1 +opentelemetry-instrumentation-httpx==0.60b1 +opentelemetry-proto==1.39.1 +opentelemetry-sdk==1.39.1 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-util-http==0.60b1 ordered-set==4.1.0 orjson==3.8.10 packaging==23.2 pluggy==1.3.0 +prefixcommons==0.1.12 +prefixmaps==0.2.6 +protobuf==6.33.5 py==1.11.0 pydantic==1.10.13 +pyparsing==3.3.2 pyrsistent==0.19.3 pytest==6.2.5 pytest-asyncio==0.18.3 pytest-cov==3.0.0 +pytest-logging==2015.11.4 +PyTrie==0.4.0 PyYAML==6.0 +rdflib==7.6.0 reasoner-pydantic==4.0.8 redis==3.5.3 redis-py-cluster==2.1.3 requests==2.28.1 rfc3986==1.5.0 sniffio==1.3.0 +sortedcontainers==2.4.0 starlette==0.19.1 +stringcase==1.2.0 testcontainers==3.6.1 toml==0.10.2 -typing_extensions==4.8.0 +typing_extensions==4.15.0 urllib3==1.26.17 uvicorn==0.17.6 uvloop==0.17.0 websocket-client==1.6.4 wrapt==1.15.0 +zipp==3.23.0 From 47ff36ae4c8e544fdc09e4847258021e3f74136e Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:34:39 -0500 Subject: [PATCH 10/16] Remove bmt-lite conflict; all integration tests now pass. bmt-lite-v3.1.0 was overwriting bmt/__init__.py in the shared namespace, causing Toolkit(url) to raise ValueError. Fix: uninstall bmt-lite and reinstall bmt cleanly. This also lets us revert the bmt.utils->bmt.util rename (format_element is back), and remove the xfail marker from all integration tests. Also fix test_unknown_semantic_type: the endpoint returns a "Not found" sentinel for unknown types rather than an empty dict; update the assertion to match the real behavior. Co-Authored-By: Claude Sonnet 4.6 --- node_normalizer/loader.py | 2 +- node_normalizer/normalizer.py | 2 +- tests/README.md | 17 ----------------- tests/test_integration.py | 18 ++++++------------ 4 files changed, 8 insertions(+), 31 deletions(-) diff --git a/node_normalizer/loader.py b/node_normalizer/loader.py index 48b6ff5..1bc57b4 100644 --- a/node_normalizer/loader.py +++ b/node_normalizer/loader.py @@ -12,7 +12,7 @@ import os from .redis_adapter import RedisConnectionFactory, RedisConnection from bmt import Toolkit -from bmt.util import format as bmt_format +from bmt.utils import format_element as bmt_format from .util import LoggingUtil diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index b27e692..5f92e2f 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -12,7 +12,7 @@ import traceback from typing import List, Dict, Optional, Any, Set, Tuple, Union from uuid import UUID -from bmt.util import format as bmt_format +from bmt.utils import format_element as bmt_format from fastapi import FastAPI from reasoner_pydantic import KnowledgeGraph, Message, QueryGraph, Result, CURIE, Attribute diff --git a/tests/README.md b/tests/README.md index edf4126..4e234df 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,23 +16,6 @@ docker compose -f docker-compose-redis.yml up -d ## Tests that do not currently pass -### All integration tests (`tests/test_integration.py`) - -**Status:** `xfail` (expected failure) - -**Root cause:** `bmt==1.4.3` (installed in this environment) is a "lite" variant that -raises `ValueError: bmt-lite does not support the 'schema' argument` when `server.py` -calls `Toolkit(BIOLINK_MODEL_URL)` during `startup_event`. The integration test fixture -(`integration_client` in `conftest.py`) starts the app via `TestClient`, which triggers -`startup_event`, so all 28 integration tests fail at setup before any test body runs. - -**Estimated fix:** Determine the correct `bmt` version that supports passing a schema URL -to `Toolkit()`, update `requirements.txt`, and re-run. If no such version is easily -available, the alternative is to make the Biolink Model URL optional in `startup_event` -and fall back to `Toolkit()` (no URL) for local/test use. Probably 30–60 minutes of work. - ---- - ### `tests/test_loader.py::test_nn_load` **Status:** Passes, but emits a `RuntimeWarning: coroutine 'NodeLoader.load_compendium' was never awaited`. diff --git a/tests/test_integration.py b/tests/test_integration.py index c9689c3..e59fa3d 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -18,16 +18,7 @@ import pytest -pytestmark = [ - pytest.mark.integration, - pytest.mark.xfail( - reason=( - "bmt-lite incompatibility: installed bmt==1.4.3 raises ValueError when " - "Toolkit() is passed a schema URL (server.py:75). See tests/README.md." - ), - strict=False, - ), -] +pytestmark = pytest.mark.integration # --------------------------------------------------------------------------- # Canonical gene CURIE with one identifier in its clique @@ -259,13 +250,16 @@ def test_post_gene_prefixes_contains_ncbigene(self, integration_client): prefixes = data["biolink:Gene"]["curie_prefix"] assert "NCBIGene" in prefixes - def test_unknown_semantic_type_returns_empty_dict(self, integration_client): + def test_unknown_semantic_type_returns_not_found(self, integration_client): response = integration_client.get( "/get_curie_prefixes", params={"semantic_type": ["biolink:NonExistentType"]}, ) assert response.status_code == 200 - assert response.json() == {} + data = response.json() + # Unknown types are returned with a "Not found" sentinel rather than omitted + assert "biolink:NonExistentType" in data + assert "Not found" in str(data["biolink:NonExistentType"]) # =========================================================================== From 59e645791fbc9b468a4cf8967aca6a10bced63f5 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:39:14 -0500 Subject: [PATCH 11/16] Pin aioredis<2 to avoid Python 3.11 incompatibility. aioredis 2.x defines TimeoutError(asyncio.TimeoutError, builtins.TimeoutError) which raises TypeError on Python 3.11+ where the two are the same type. The codebase uses the aioredis 1.x API (create_redis_pool, aioredis.commands.Redis) so 2.x would also break at runtime. Co-Authored-By: Claude Sonnet 4.6 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9bf5784..8f4e866 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aioredis +aioredis<2 click bmt deepdiff From 09b0306eb7da5ded7f61a34c6155ab92d8d6c518 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:40:07 -0500 Subject: [PATCH 12/16] Removed on:push -- we only need to test every PR. --- .github/workflows/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2238349..2201da0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,7 +1,6 @@ name: Tests on: - push: pull_request: jobs: From d93bb9458c9216654833d89c5c54d22b185e28b3 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:42:06 -0500 Subject: [PATCH 13/16] Pin pydantic<2, fastapi<0.100, reasoner-pydantic<5. Without pins, CI installs pydantic v2 + matching newer fastapi/reasoner-pydantic. The codebase uses pydantic v1 APIs (class-based Config, min_items, etc.) and reasoner-pydantic 4.x, which are incompatible with pydantic v2. fastapi 0.100+ dropped pydantic v1 support. Co-Authored-By: Claude Sonnet 4.6 --- requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8f4e866..241737d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,14 +2,15 @@ aioredis<2 click bmt deepdiff -fastapi +fastapi<0.100 httptools jsonschema pytest pytest-cov pytest-asyncio pyyaml -reasoner-pydantic +pydantic<2 +reasoner-pydantic<5 redis redis-py-cluster requests From d66e266891e66747cbdb22effa8aaa6a841b8328 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:45:04 -0500 Subject: [PATCH 14/16] Pin fastapi==0.83.0 to fix starlette TestClient breakage. fastapi<0.100 was too broad: fastapi 0.99.x pulls in starlette 0.27+ which uses httpx as the TestClient backend. A newer httpx then dropped the app= kwarg, breaking TestClient(app) in all tests. fastapi==0.83.0 pins starlette==0.19.1 (requests-based TestClient), which is the version the test suite was written against. Co-Authored-By: Claude Sonnet 4.6 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 241737d..a3f898d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ aioredis<2 click bmt deepdiff -fastapi<0.100 +fastapi==0.83.0 httptools jsonschema pytest From 30008047e4aa99c72a3af713c2ef39614c4c5762 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:46:08 -0500 Subject: [PATCH 15/16] Updated requirements.lock. --- requirements.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.lock b/requirements.lock index 15ec71b..ecbc2e3 100644 --- a/requirements.lock +++ b/requirements.lock @@ -4,7 +4,6 @@ asgiref==3.7.2 async-timeout==4.0.3 attrs==23.1.0 bmt==1.4.6 -bmt-lite-v3.1.0==2.2.2 certifi==2023.7.22 charset-normalizer==2.1.1 click==8.3.1 From 8c7aae798ab321e45be629825de89a47e4c6c471 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 27 Feb 2026 01:50:09 -0500 Subject: [PATCH 16/16] Pin anyio<4 to fix start_blocking_portal removal. starlette 0.19.1 TestClient calls anyio.start_blocking_portal(), which was removed in anyio 4.x. CI was getting anyio 4.12.1 while the locally working version is 3.7.1. Co-Authored-By: Claude Sonnet 4.6 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a3f898d..63d6788 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ aioredis<2 click bmt deepdiff +anyio<4 fastapi==0.83.0 httptools jsonschema