Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,6 @@ cython_debug/
# static files generated from Django application using `collectstatic`
media
static

# Don't include the creds file
neo4jcreds.json
39 changes: 22 additions & 17 deletions modules/DrugSynonymDataToNeo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,32 @@

logger = logging.getLogger('ds-neo4j')

def dict_to_property_str(properties:Optional[dict] = None) -> str:

def dict_to_property_str(properties: Optional[dict] = None) -> str:

def property_type_checker(property_value):
if isinstance(property_value,int) or isinstance(property_value,float):
if isinstance(property_value, int) or isinstance(property_value, float):
pass
elif isinstance(property_value,str):
property_value = '''"''' + property_value.replace('"',r"\"") + '''"'''
elif isinstance(property_value, str):
property_value = '''"''' + \
property_value.replace('"', r"\"") + '''"'''
elif not property_value:
property_value = "''"
return property_value

resp:str = ""
resp: str = ""
if properties:
resp = "{"
for key in properties.keys():
resp += """{key}:{value},""".format(key=key,value=property_type_checker(properties[key]))
resp += """{key}:{value},""".format(key=key,
value=property_type_checker(properties[key]))
resp = resp[:-1] + "}"
return resp

def cypher_template_filler(cypher_template:str,data:dict) -> str:
return cypher_template.format(**data).replace("\n","")

def cypher_template_filler(cypher_template: str, data: dict) -> str:
return cypher_template.format(**data).replace("\n", "")


class DrugSynonymDataToNeo4j(object):

Expand All @@ -47,15 +52,14 @@ def close(self):

@staticmethod
def _merge_node(tx, node_type, properties:Optional[dict] = None):



data:dict = {
"node_type":node_type,
"properties":dict_to_property_str(properties)
}
base_cypher = """
MERGE (n:{node_type} {properties})
ON CREATE SET n.record_created_at=timestamp()
ON MATCH SET n.record_updated_at=timestamp()
RETURN id(n)
"""

Expand Down Expand Up @@ -126,18 +130,19 @@ def merge_drugs_synonyms_and_link_between(self,drug_vocab):
if isinstance(drug_vocab[drug],list):
for synonym in drug_vocab[drug]:
node_type = "Synonym"
properties:dict = {
"name":synonym
properties: dict = {
"name": synonym
}
synonym_id = session.write_transaction(node_merging_func, node_type, properties)
self.drug_or_synonym_name_and_neo4j_id_pairs[synonym] = synonym_id
count_node += 1

edge_type = "KNOWN_AS"
session.write_transaction(edge_merging_func, drug_id, synonym_id, edge_type)
session.write_transaction(
edge_merging_func, drug_id, synonym_id, edge_type)
count_edge += 1
if count_node > prev_count_node + 1000 or count_edge > prev_count_edge + 1000:

if count_node > prev_count_node + 1000 or count_edge > prev_count_edge + 1000:
prev_count_node = count_node
prev_count_edge = count_edge
logger.info("> {} nodes and {} edges already merged".format(count_node,count_edge))
Expand Down Expand Up @@ -210,4 +215,4 @@ def _parse_url(url:str):
'password': parsed.password,
'hostname': parsed.hostname,
'port': parsed.port,
}
}
6 changes: 4 additions & 2 deletions modules/IngestDrugSynonymsWF.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@

from IngestDrugSynonyms import IngestDrugSynonyms
from DrugSynonymDataToNeo4j import DrugSynonymDataToNeo4j
from modules.Neo4jDataAccess import Neo4jDataAccess

drugSynonym = IngestDrugSynonyms()
drugSynonym.auto_get_and_clean_data()

neo4jBridge = DrugSynonymDataToNeo4j()
neo4jBridge = DrugSynonymDataToNeo4j(
graph=Neo4jDataAccess().get_neo4j_graph(Neo4jDataAccess.RoleType.WRITER))
neo4jBridge.upload_drugs_and_synonyms(drugSynonym.drug_vocab)
neo4jBridge.upload_studies(drugSynonym.all_studies_df)
neo4jBridge.upload_studies(drugSynonym.all_studies_df)
9 changes: 9 additions & 0 deletions modules/Neo4jDataAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class RelationshipLabel(enum.Enum):
RETWEETED = 'RETWEETED'
INCLUDES = 'INCLUDES'

class RoleType(enum.Enum):
READER = 'reader'
WRITER = 'writer'

def __init__(self, debug=False, neo4j_creds=None, batch_size=2000, timeout="60s"):
self.creds = neo4j_creds
self.debug = debug
Expand Down Expand Up @@ -245,6 +249,11 @@ def get_tweet_by_id(self, df: pd.DataFrame, cols=[]):
raise TypeError(
'Parameter df must be a DataFrame with a column named "id" ')

def get_neo4j_graph(self, role_type: RoleType):
if not isinstance(role_type, self.RoleType):
raise TypeError('The role_type parameter is not of type RoleType')
return self.__get_neo4j_graph(role_type.value)

def save_enrichment_df_to_graph(self, label: NodeLabel, df: pd.DataFrame, job_name: str, job_id=None):
if not isinstance(label, self.NodeLabel):
raise TypeError('The label parameter is not of type NodeType')
Expand Down
12 changes: 12 additions & 0 deletions pipelines/Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
prefect = "*"

[requires]
python_version = "3.7"
Loading