-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathusers_and_tracks_from_list.py
118 lines (99 loc) · 3.41 KB
/
users_and_tracks_from_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import cPickle
import os
import os.path
import glob
import sqlite3
import all_data_one_entity as adoe
list_size_limit=1000
class Writer(object):
def __init__(self,dir_path,done_file):
self.trackdir=os.path.join(dir_path,'tracks')
self.userdir=os.path.join(dir_path,'users')
self.done_file=done_file
if not os.path.exists(self.trackdir):
os.mkdir(self.trackdir)
if not os.path.exists(self.userdir):
os.mkdir(self.userdir)
self.trackstore=[]
self.userstore=[]
self.donestore=[]
def save_and_clear(self,store,dir):
num=str(len([f for f in os.listdir(dir)])+1)
f=open(os.path.join(dir,num+'.pck'),'w')
cPickle.dump(store,f)
f.close()
store[:]=[]
def save_done(self):
for t in self.donestore:
self.done_file.write('{}\n'.format(t))
self.done_file.flush()
self.donestore[:]=[]
def save_all(self):
self.save_and_clear(self.trackstore,self.trackdir)
self.save_and_clear(self.userstore,self.userdir)
self.save_done()
print 'Remainder written.'
def add_done(self,user):
self.donestore.append(user)
def add_track(self,track):
self.trackstore.append(track)
if len(self.trackstore) >= list_size_limit:
print '{} tracks ready to save.'.format(len(self.trackstore))
self.save_and_clear(self.trackstore,self.trackdir)
self.save_and_clear(self.userstore,self.userdir)
self.save_done()
def add_user(self,user):
self.userstore.append(user)
def names(listfilepath):
dir=os.path.dirname(listfilepath)
name=os.path.basename(listfilepath).split('.')[0]
subdir=os.path.join(dir,name+'_dir')
return dir,name,subdir
def get_users_and_tracks(listfilepath):
dir,name,subdir=names(listfilepath)
if not os.path.exists(subdir):
os.mkdir(subdir)
donepath=os.path.join(dir,name+'_done.txt')
to_do={int(l) for l in open(listfilepath,'r').readlines() if l.strip()}
if os.path.exists(donepath):
done_file=open(donepath,'r')
to_do.difference_update({int(l) for l in done_file.readlines()
if l.strip()})
done_file.close()
to_do=sorted(to_do)
done_file=open(donepath,'a')
w=Writer(subdir,done_file)
print '{} to work through...'.format(len(to_do))
for i,l in enumerate(to_do):
if i < 10 or i % 1000 == 0: print '{} worked through'.format(i)
n=int(l)
ud=adoe.user_data(n)
w.add_done(n)
if ud:
tu=adoe.user_data(td['user_id'])
if tu:
w.add_user(tu)
w.add_track(td)
w.save_all()
def insert_data(listfilepath,dbfilepath):
conn=sqlite3.connect(dbfilepath)
curs=conn.cursor()
dir,name,subdir=names(listfilepath)
tracks=glob.iglob(os.path.join(subdir,'users','*.pck'))
users=glob.iglob(os.path.join(subdir,'users','*.pck'))
print 'Inserting tracks now'
for t in tracks:
print t
with open(t) as f:
l=cPickle.load(f)
for d in l:
adoe.insert_into_table(curs,'tracks',d)
conn.commit()
print 'Inserting users now'
for u in users:
print u
with open(u) as f:
l=cPickle.load(f)
for d in l:
adoe.insert_into_table(curs,'users',d)
conn.commit()