Skip to content

Commit 6de1413

Browse files
committed
Adds new tests for map processor.
1 parent bf498ba commit 6de1413

File tree

3 files changed

+124
-4
lines changed

3 files changed

+124
-4
lines changed

‎mw/xml_dump/map.py‎

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import logging
2-
from multiprocessing import cpu_count, Queue, Value
2+
from multiprocessing import Queue, Value, cpu_count
33
from queue import Empty
44

55
from .functions import file
66
from .processor import DONE, Processor
77

8-
logger = logging.getLogger("mw.dump.map")
8+
logger = logging.getLogger("mw.xml_dump.map")
99

1010

1111
def re_raise(error, path):
@@ -57,7 +57,7 @@ def page_info(dump, path):
5757
outputq = Queue(maxsize=output_buffer)
5858
running = Value('i', 0)
5959
threads = max(1, min(int(threads), pathsq.qsize()))
60-
60+
6161
processors = []
6262

6363
for i in range(0, threads):
@@ -86,7 +86,6 @@ def page_info(dump, path):
8686
error, path = item
8787
re_raise(error, path)
8888

89-
9089
def queue_files(paths):
9190
"""
9291
Produces a `multiprocessing.Queue` containing path for each value in

‎mw/xml_dump/tests/test_map.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
22

33
from nose.tools import eq_, raises
4+
45
from ..map import map
56

67

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import io
2+
from multiprocessing import Queue
3+
4+
from nose.tools import eq_, raises
5+
6+
from ..processor import DONE, Processor
7+
8+
9+
SAMPLE_XML = """
10+
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/"
11+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
12+
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/
13+
http://www.mediawiki.org/xml/export-0.8.xsd"
14+
version="0.8" xml:lang="en">
15+
<siteinfo>
16+
<sitename>Wikipedia</sitename>
17+
<base>http://en.wikipedia.org/wiki/Main_Page</base>
18+
<generator>MediaWiki 1.22wmf2</generator>
19+
<case>first-letter</case>
20+
<namespaces>
21+
<namespace key="0" case="first-letter" />
22+
<namespace key="1" case="first-letter">Talk</namespace>
23+
</namespaces>
24+
</siteinfo>
25+
<page>
26+
<title>Foo</title>
27+
<ns>0</ns>
28+
<id>1</id>
29+
<revision>
30+
<id>1</id>
31+
<timestamp>2004-08-09T09:04:08Z</timestamp>
32+
<contributor>
33+
<username>Gen0cide</username>
34+
<id>92182</id>
35+
</contributor>
36+
<text xml:space="preserve">Revision 1 text</text>
37+
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
38+
<model>wikitext</model>
39+
<format>text/x-wiki</format>
40+
</revision>
41+
<revision>
42+
<id>2</id>
43+
<timestamp>2004-08-10T09:04:08Z</timestamp>
44+
<contributor>
45+
<ip>222.152.210.109</ip>
46+
</contributor>
47+
<text xml:space="preserve">Revision 2 text</text>
48+
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
49+
<model>wikitext</model>
50+
<comment>Comment 2</comment>
51+
<format>text/x-wiki</format>
52+
</revision>
53+
</page>
54+
<page>
55+
<title>Bar</title>
56+
<ns>1</ns>
57+
<id>2</id>
58+
<revision>
59+
<id>3</id>
60+
<timestamp>2004-08-11T09:04:08Z</timestamp>
61+
<contributor>
62+
<ip>222.152.210.22</ip>
63+
</contributor>
64+
<text xml:space="preserve">Revision 3 text</text>
65+
<sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
66+
<model>wikitext</model>
67+
<format>text/x-wiki</format>
68+
</revision>
69+
</page>
70+
</mediawiki>"""
71+
72+
73+
74+
def test_processor():
75+
76+
pathq = Queue()
77+
pathq.put(io.StringIO(SAMPLE_XML))
78+
79+
outputq = Queue()
80+
81+
def process_dump(dump, path):
82+
for page in dump:
83+
yield page.id
84+
85+
86+
processor = Processor(pathq, outputq, process_dump)
87+
processor.start()
88+
89+
error, item = outputq.get()
90+
assert not error
91+
eq_(item, 1)
92+
93+
error, item = outputq.get()
94+
assert not error
95+
eq_(item, 2)
96+
97+
error, item = outputq.get()
98+
assert not error
99+
eq_(item, DONE)
100+
101+
def test_processor_error():
102+
103+
pathq = Queue()
104+
pathq.put(io.StringIO(SAMPLE_XML))
105+
106+
outputq = Queue()
107+
108+
def process_dump(dump, path):
109+
raise Exception("foo")
110+
111+
112+
processor = Processor(pathq, outputq, process_dump)
113+
processor.start()
114+
115+
error, item = outputq.get()
116+
assert error
117+
118+
error, item = outputq.get()
119+
assert not error
120+
eq_(item, DONE)

0 commit comments

Comments
 (0)