This repository has been archived by the owner on Jul 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
57 changed files
with
3,900 additions
and
634 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# -*- coding: utf-8 -*- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# -*- coding: utf-8 -*- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# -*- coding: utf-8 -*- | ||
import sqlite3 | ||
|
||
import logging | ||
|
||
from analytics.cases.main import ProcessDataCase | ||
|
||
logger = logging.getLogger('process') | ||
|
||
|
||
class CountOfGames(ProcessDataCase): | ||
|
||
def process(self): | ||
connection = sqlite3.connect(self.db_file) | ||
|
||
with connection: | ||
cursor = connection.cursor() | ||
|
||
total_games_sql = 'SELECT count(*) from logs' | ||
hanchan_games_sql = 'SELECT count(*) from logs where is_tonpusen = 0;' | ||
|
||
cursor.execute(total_games_sql) | ||
data = cursor.fetchone() | ||
total_games = data and data[0] or 0 | ||
|
||
cursor.execute(hanchan_games_sql) | ||
data = cursor.fetchone() | ||
hanchan_games = data and data[0] or 0 | ||
|
||
tonpusen_games = total_games - hanchan_games | ||
|
||
hanchan_percentage = total_games and (hanchan_games / total_games) * 100 or 0 | ||
tonpusen_percentage = total_games and (tonpusen_games / total_games) * 100 or 0 | ||
|
||
logger.info('Total games: {}'.format(total_games)) | ||
logger.info('Hanchan games: {}, {:.2f}%'.format(hanchan_games, hanchan_percentage)) | ||
logger.info('Tonpusen games: {}, {:.2f}%'.format(tonpusen_games, tonpusen_percentage)) | ||
logger.info('') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# -*- coding: utf-8 -*- | ||
import re | ||
|
||
import logging | ||
|
||
from analytics.cases.main import ProcessDataCase | ||
|
||
logger = logging.getLogger('process') | ||
|
||
|
||
class HonitsuHands(ProcessDataCase): | ||
HONITSU_ID = '34' | ||
|
||
def process(self): | ||
self.load_all_records() | ||
|
||
filtered_rounds = self.filter_rounds() | ||
logger.info('Found {} honitsu hands'.format(len(filtered_rounds))) | ||
|
||
def filter_rounds(self): | ||
""" | ||
Find all rounds that were ended with honitsu hand | ||
""" | ||
filtered_rounds = [] | ||
|
||
total_rounds = [] | ||
for hanchan in self.hanchans: | ||
total_rounds.extend(hanchan.rounds) | ||
|
||
find = re.compile(r'yaku=\"(.+?)\"') | ||
for round_item in total_rounds: | ||
for tag in round_item: | ||
if 'AGARI' in tag and 'yaku=' in tag: | ||
yaku_temp = find.findall(tag)[0].split(',') | ||
# start at the beginning at take every second item (even) | ||
yaku_list = yaku_temp[::2] | ||
|
||
if self.HONITSU_ID in yaku_list: | ||
filtered_rounds.append(round_item) | ||
|
||
return filtered_rounds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# -*- coding: utf-8 -*- | ||
import bz2 | ||
import sqlite3 | ||
import logging | ||
|
||
import re | ||
|
||
logger = logging.getLogger('process') | ||
|
||
|
||
class Hanchan(object): | ||
log_id = None | ||
is_tonpusen = False | ||
content = None | ||
rounds = [] | ||
|
||
def __init__(self, log_id, is_tonpusen, compressed_content): | ||
self.log_id = log_id | ||
self.is_tonpusen = is_tonpusen, | ||
self.content = bz2.decompress(compressed_content) | ||
self.rounds = [] | ||
|
||
self._parse_rounds() | ||
|
||
def _parse_rounds(self): | ||
# we had to parse it manually, to save resources | ||
tag_start = 0 | ||
tag = None | ||
game_round = [] | ||
for x in range(0, len(self.content)): | ||
if self.content[x] == '>': | ||
tag = self.content[tag_start:x+1] | ||
tag_start = x + 1 | ||
|
||
# not useful tags | ||
if tag and ('mjloggm' in tag or 'TAIKYOKU' in tag): | ||
tag = None | ||
|
||
# new round was started | ||
if tag and 'INIT' in tag: | ||
self.rounds.append(game_round) | ||
game_round = [] | ||
|
||
# the end of the game | ||
if tag and 'owari' in tag: | ||
self.rounds.append(game_round) | ||
|
||
if tag: | ||
# to save some memory we can remove not needed information from logs | ||
if 'INIT' in tag: | ||
# we dont need seed information | ||
find = re.compile(r'shuffle="[^"]*"') | ||
tag = find.sub('', tag) | ||
|
||
if 'sc' in tag: | ||
# and we don't need points deltas | ||
find = re.compile(r'sc="[^"]*" ') | ||
tag = find.sub('', tag) | ||
|
||
# add processed tag to the round | ||
game_round.append(tag) | ||
tag = None | ||
|
||
# first element is player names, ranks and etc. | ||
# we shouldn't consider it as game round | ||
# and for now let's not save it | ||
self.rounds = self.rounds[1:] | ||
|
||
|
||
class ProcessDataCase(object): | ||
db_file = '' | ||
hanchans = [] | ||
|
||
def __init__(self, db_file): | ||
self.db_file = db_file | ||
|
||
self.hanchans = [] | ||
|
||
def process(self): | ||
raise NotImplemented() | ||
|
||
def load_all_records(self): | ||
limit = 60000 | ||
logger.info('Loading data...') | ||
|
||
connection = sqlite3.connect(self.db_file) | ||
|
||
with connection: | ||
cursor = connection.cursor() | ||
|
||
cursor.execute("""SELECT log_id, is_tonpusen, log_content FROM logs | ||
WHERE is_processed = 1 and was_error = 0 LIMIT ?;""", [limit]) | ||
data = cursor.fetchall() | ||
|
||
logger.info('Found {} records'.format(len(data))) | ||
|
||
logger.info('Unzipping and processing games data...') | ||
for item in data: | ||
self.hanchans.append(Hanchan(item[0], item[1] == 1, item[2])) | ||
|
||
total_rounds = 0 | ||
for hanchan in self.hanchans: | ||
total_rounds += len(hanchan.rounds) | ||
|
||
logger.info('Found {} rounds'.format(total_rounds)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
import sqlite3 | ||
import sys | ||
|
||
db_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'db') | ||
db_file = '' | ||
|
||
|
||
def main(): | ||
parse_command_line_arguments() | ||
|
||
connection = sqlite3.connect(db_file) | ||
|
||
with connection: | ||
cursor = connection.cursor() | ||
|
||
cursor.execute('SELECT COUNT(*) from logs;') | ||
total = cursor.fetchone()[0] | ||
|
||
cursor.execute('SELECT COUNT(*) from logs where is_processed = 1;') | ||
processed = cursor.fetchone()[0] | ||
|
||
cursor.execute('SELECT COUNT(*) from logs where was_error = 1;') | ||
with_errors = cursor.fetchone()[0] | ||
|
||
print('Total: {}'.format(total)) | ||
print('Processed: {}'.format(processed)) | ||
print('With errors: {}'.format(with_errors)) | ||
|
||
|
||
def parse_command_line_arguments(): | ||
if len(sys.argv) > 1: | ||
year = sys.argv[1] | ||
else: | ||
year = '2017' | ||
|
||
global db_file | ||
db_file = os.path.join(db_folder, '{}.db'.format(year)) | ||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.