Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(topic tracker): match twint tracker
  • Loading branch information
lmeyerov committed Dec 24, 2020
commit bf6435099e5f0d155b92dbbfac421e4e162ade8b
11 changes: 8 additions & 3 deletions infra/pipelines/docker/datastream-Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,22 @@ RUN apt-get update \
RUN pip install prefect==0.10.1 simplejson twarc neo4j boto3==1.12.39 \
pandas pyarrow urlextract git+https://github.com/homm/yurl.git@1943161973aeb3b3cf2e1e9de6671673b8356161

RUN echo "ok6" && pip install git+https://github.com/TheDataRideAlongs/twint.git
#RUN pip install git+https://github.com/twintproject/twint.git
#RUN echo "ok6" && pip install git+https://github.com/TheDataRideAlongs/twint.git
RUN pip install git+https://github.com/twintproject/twint.git
#git+https://github.com/lmeyerov/twint.git@patch-1#egg=twint
#RUN pip install git+https://github.com/himanshudabas/twint.git@twint-fixes#egg=twint

#https://github.com/twintproject/twint/issues/1061
#RUN pip install git+git://github.com/ajctrl/twint@patch-1


COPY ./modules /modules/ProjectDomino
COPY ./infra/pipelines/docker/datastream-entrypoint.sh /entrypoint.sh

ENV JOB_FILE=search_by_date_job.py
ENV TOPIC=covid

ENTRYPOINT ["/entrypoint.sh"]
#ENTRYPOINT ["/bin/bash", "-c", 'echo "ok"']

HEALTHCHECK --interval=60s --timeout=15s --start-period=20s \
CMD curl -sf --socks5-hostname localhost:9050 https://check.torproject.org | grep Congrat
Expand Down
5 changes: 4 additions & 1 deletion infra/pipelines/docker/datastream-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ services:
network_mode: 'bridge'
volumes:
- /home/lmeyerov2/neo4jcreds.json:/secrets/neo4jcreds.json:ro
- ./jobs:/app:ro
- ./../../../jobs:/app:ro
environment:
JOB_FILE: ${JOB_FILE:-search_by_date_job.py}
TOPIC: ${TOPIC:-covid}
PREFECT__SERVER__HOST: ${PREFECT__SERVER__HOST:-http://host.docker.internal}
PREFECT__SERVER__PORT: ${PREFECT__SERVER__PORT:-4200}
PREFECT__SERVER__UI__HOST: ${PREFECT__SERVER__UI__HOST:-http://host.docker.internal}
Expand All @@ -30,3 +31,5 @@ services:
logging:
options:
tag: 'ImageName:{{.ImageName}}/Name:{{.Name}}/ID:{{.ID}}/ImageFullID:{{.ImageFullID}}'
max-size: "20M"
max-file: "10"
2 changes: 1 addition & 1 deletion infra/pipelines/docker/datastream-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -ex

echo "Starting JOB_FILE ${JOB_FILE}"
echo "Starting JOB_FILE ${JOB_FILE}, maybe TOPIC ${TOPIC}"

service tor start

Expand Down
134 changes: 0 additions & 134 deletions infra/pipelines/docker/jobs/track_qanon1.py

This file was deleted.

7 changes: 7 additions & 0 deletions jobs/search_covid.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
set -ex

( cd ../infra/pipelines/docker/ && sudo docker-compose -f datastream-docker-compose.yml -p "covid_tracker" down -v )
( cd ../infra/pipelines/docker/ && sudo docker-compose -f datastream-docker-compose.yml -p "covid_tracker" build )
( cd ../infra/pipelines/docker/ && sudo docker-compose -f datastream-docker-compose.yml -p "covid_tracker" up -d )

28 changes: 28 additions & 0 deletions jobs/search_topics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
set -ex

export JOB_FILE=track_topics.py
export PROJECT=${PROJECT_NAME:-topictracker}

echo "USING: JOB_FILE ${JOB_FILE}, PROJECT ${PROJECT}"


jq -car 'keys[]' topics.json | while read -r topic; do
( cd ../infra/pipelines/docker \
&& docker-compose -f datastream-docker-compose.yml -p "${PROJECT}_${topic}" down -v ; )
done




### Run for 30 lines before starting next
N_CHECK=30

jq -car 'keys[]' topics.json | while read -r topic; do
echo "topic: ${topic}"
( cd ../infra/pipelines/docker \
&& docker-compose -f datastream-docker-compose.yml -p "${PROJECT}_${topic}" build \
&& TOPIC="${topic}" JOB_FILE="${JOB_FILE}" docker-compose -f datastream-docker-compose.yml -p "${PROJECT}_${topic}" up -d \
&& ( docker-compose -f datastream-docker-compose.yml -p "${PROJECT}_${topic}" logs -f -t --tail=$N_CHECK | head -n $N_CHECK; ) ; ) \
|| { echo "exn" && exit 1; }
done
Loading