import pywikibot as bot
from pywikibot import pagegenerators as pg
import pprint

## TO DO ##
# In checking for number of keys, accept more than 1, but refuse those that contain disambig
# Save the database into a big json once a day?

testNames = ['Engström', 'Zaugg', 'Blacher', 'Witkowski', 'Hjalmarsson']

site = bot.Site('wikidata', 'wikidata')

with open('query.rq', 'r') as query_file:
    QUERY = query_file.read().replace('\n', '')
    print(QUERY)

generator = pg.WikidataSPARQLPageGenerator(QUERY, site=site)
legitNames = []
namesWithDisambig = []
namesToCreate = []

counter = 0
for item in generator:
    try:
        item_dict = item.get()
    except (bot.IsRedirectPage, bot.NoPage):
        pass
    try:
        label = item_dict['labels']['en']
    except KeyError:
        keys = list(item_dict['labels'].keys())
        if len(keys) > 0:
            key = keys[0]
            label = item_dict['labels'][key]
    counter = counter + 1
    clm_dict = item_dict["claims"]
    if len(clm_dict["P31"]) == 1:
        legitNames.append(label)
    else:
        namesWithDisambig.append(label)
    if counter % 100 == 0:
        print(counter)

print("DONE GENERATING")
SELECT ?item WHERE {  ?item wdt:P31/wdt:P279* wd:Q101352.}
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
11800
11900
12000
12100
12200
12300
12400
12500
12600
12700
12800
12900
13000
13100
13200
13300
13400
13500
13600
13700
13800
13900
14000
14100
14200
14300
14400
14500
14600
14700
14800
14900
15000
15100
15200
15300
15400
15500
15600
15700
15800
15900
16000
16100
16200
16300
16400
16500
16600
16700
16800
16900
17000
17100
17200
17300
17400
17500
17600
17700
17800
17900
18000
18100
18200
18300
18400
18500
18600
18700
18800
18900
19000
19100
19200
19300
19400
19500
19600
19700
19800
19900
20000
20100
20200
20300
20400
20500
20600
20700
20800
20900
21000
21100
21200
21300
21400
21500
21600
21700
21800
21900
22000
22100
22200
22300
22400
22500
22600
22700
22800
22900
23000
23100
23200
23300
23400
23500
23600
23700
23800
23900
24000
24100
24200
24300
24400
24500
24600
24700
24800
24900
25000
25100
25200
25300
25400
25500
25600
25700
25800
25900
26000
26100
26200
26300
26400
26500
26600
26700
26800
26900
27000
27100
27200
27300
27400
ERROR: Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 595, in urlopen
    chunked=chunked)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 393, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 389, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/lib/python3.4/http/client.py", line 1172, in getresponse
    response.begin()
  File "/usr/lib/python3.4/http/client.py", line 351, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.4/http/client.py", line 321, in _read_status
    raise BadStatusLine(line)
http.client.BadStatusLine: ''

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/requests/adapters.py", line 423, in send
    timeout=timeout
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 640, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/util/retry.py", line 261, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/packages/six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 595, in urlopen
    chunked=chunked)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 393, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 389, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/lib/python3.4/http/client.py", line 1172, in getresponse
    response.begin()
  File "/usr/lib/python3.4/http/client.py", line 351, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.4/http/client.py", line 321, in _read_status
    raise BadStatusLine(line)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine("''",))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/data/api.py", line 1954, in submit
    body=body, headers=headers)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/tools/__init__.py", line 1447, in wrapper
    return obj(*__args, **__kw)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 309, in request
    r = fetch(baseuri, method, body, headers, **kwargs)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 464, in fetch
    error_handling_callback(request)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 382, in error_handling_callback
    raise request.data
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 361, in _http_process
    verify=not ignore_validation)
  File "/srv/paws/lib/python3.4/site-packages/requests/sessions.py", line 475, in request
    resp = self.send(prep, **send_kwargs)
  File "/srv/paws/lib/python3.4/site-packages/requests/sessions.py", line 596, in send
    r = adapter.send(request, **kwargs)
  File "/srv/paws/lib/python3.4/site-packages/requests/adapters.py", line 473, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine("''",))

WARNING: Waiting 5 seconds before retrying.
27500
27600
27700
27800
27900
28000
28100
28200
28300
28400
28500
28600
28700
28800
28900
29000
29100
29200
29300
29400
29500
29600
29700
29800
29900
30000
30100
30200
30300
30400
30500
30600
30700
30800
30900
31000
31100
31200
31300
31400
31500
31600
31700
31800
31900
32000
32100
32200
32300
32400
32500
32600
32700
32800
32900
33000
33100
33200
33300
33400
33500
33600
33700
33800
33900
34000
34100
34200
34300
34400
34500
34600
34700
34800
34900
35000
35100
35200
35300
35400
35500
35600
35700
35800
35900
36000
36100
36200
36300
36400
36500
36600
36700
36800
36900
37000
37100
37200
37300
37400
37500
37600
37700
37800
37900
38000
38100
38200
38300
38400
38500
38600
38700
38800
38900
39000
39100
39200
39300
39400
39500
39600
39700
39800
39900
40000
40100
40200
40300
40400
40500
40600
40700
40800
40900
41000
41100
41200
41300
41400
41500
41600
41700
41800
41900
42000
42100
42200
42300
42400
42500
42600
42700
42800
42900
43000
43100
43200
43300
43400
43500
43600
43700
43800
43900
44000
44100
44200
44300
44400
44500
44600
44700
44800
44900
45000
45100
45200
45300
45400
45500
45600
45700
45800
45900
46000
46100
46200
46300
46400
46500
46600
46700
46800
46900
47000
47100
47200
47300
47400
47500
47600
47700
47800
47900
48000
48100
48200
48300
48400
48500
48600
48700
48800
48900
49000
49100
49200
49300
49400
49500
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
49600
49700
49800
49900
50000
50100
50200
50300
50400
50500
50600
50700
50800
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
50900
51000
51100
51200
51300
51400
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
51500
51600
51700
51800
51900
52000
52100
52200
52300
52400
52500
52600
52700
52800
52900
53000
53100
53200
53300
53400
53500
53600
53700
53800
53900
54000
54100
54200
54300
54400
54500
54600
54700
54800
54900
55000
55100
55200
55300
55400
55500
55600
55700
55800
55900
56000
56100
56200
56300
56400
56500
56600
56700
56800
56900
57000
57100
57200
57300
57400
57500
57600
57700
57800
57900
58000
58100
58200
58300
58400
58500
58600
58700
58800
58900
59000
59100
59200
59300
59400
59500
59600
59700
59800
59900
60000
60100
60200
60300
60400
60500
60600
60700
60800
60900
61000
61100
61200
61300
61400
61500
61600
61700
61800
61900
62000
62100
62200
62300
62400
62500
62600
62700
62800
62900
63000
63100
63200
63300
63400
63500
63600
63700
63800
63900
64000
64100
64200
64300
64400
64500
64600
64700
64800
64900
65000
65100
65200
65300
65400
65500
65600
65700
65800
65900
66000
66100
66200
66300
66400
66500
66600
66700
66800
66900
67000
67100
67200
67300
67400
67500
67600
67700
67800
67900
68000
68100
68200
68300
68400
DONE GENERATING
names = [line.rstrip('\n') for line in open('sv_names.txt')]
namesToCreate = []
for name in names:
    if name not in legitNames and name not in namesWithDisambig:
        namesToCreate.append(name)

# check if Wójcik is added to lists
# it's a subclass, so it should be in legitNames, not in namesToCreate
languagesRoman = ["en", "de", "nb", "nn", "nl", "da", "fi", "sv", "fr", "es", "it", "lt", "lv", "pl"]
createStart = "CREATE"


with open("sv_names_quickstatements.txt", 'w') as file_handler:
    for item in namesToCreate:
        file_handler.write(createStart + "\n")
        file_handler.write('LAST\tP31\tQ101352\n')
        for language in languagesRoman:
            file_handler.write('LAST\tL{}\t"{}"\n'.format(language, item))
        file_handler.write('LAST\tDen\t"family name"\n')
for x in range(0, 20):
    print(namesToCreate[x])
Von
Åsberg
Häggström
Bylund
Öman
Norlin
Flink
Flodin
Turesson
Sundell
Wennberg
Lidström
Sundkvist
Matsson
Vesterberg
Pihl
Stenlund
Granath
Åman
Hjort
"Wójcik" in legitNames
True