import xml.etree.ElementTree as ET
import datetime
import operator
import pickle
import base64
import os
import re
tree = ET.parse('/Volumes/Data/Items.txt')
root = tree.getroot()
from HTMLParser import HTMLParser
# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
def __init__(self, fileName):
self.dataIsPost = False
self.dataIsPlayer = False
self.dataIsTime = False
self.dataIsQuoteBody = False
self.dataIsQuoteHeader = False
self.collectingInfo = False
self.fileName = fileName
self.allVotes = [[]]
self.info = {"player":"","time":"","post":"","vote":""}
self.load()
self.divInPostDepth = 0
HTMLParser.__init__(self)
def save(self):
try:
with open(self.fileName,"wb") as fh:
fh.write(pickle.dumps(self.allVotes))
except:
print("Failed to save")
def load(self):
try:
with open(self.fileName,"rb") as fh:
self.allVotes = pickle.loads(fh.read())
except:
print("Failed to load")
def latestPostTime(self):
voteList = []
if len(self.allVotes) > 0:
voteList = self.allVotes[-1]
postTime = datetime.datetime(1,1,1)
if voteList:
postTime = voteList[-1]["time"]
print(postTime)
return postTime
def commandCompare(self, command, compare):
return len(command) <= len(compare) and compare[0:len(command)] == command
def outputVotes(self):
for day in range(0, len(self.allVotes)):
print("Day {}".format(day+1))
dayVotes = self.allVotes[day]
votes = {}
for vote in dayVotes:
if vote["vote"] not in votes:
votes[vote["vote"]] = 0
votes[vote["vote"]] += 1
for player,nVotes in votes.items():
print("\t{}: {}".format(player,nVotes))
def handle_starttag(self, tag, attrs):
if tag == "div":
if self.dataIsPost:
self.divInPostDepth += 1
for attr in attrs:
if attr == ("class","post"):
self.dataIsPost = True
elif attr == ("class","topslice_quote"):
self.dataIsQuoteHeader = True
elif attr == ("class","smalltext"):
self.dataIsTime = True
elif tag == "blockquote":
self.dataIsQuoteBody = True
elif tag == "a":
rightUrl = False
rightTitle = False
for attr in attrs:
if attr[0] == "href":
if attr[1].startswith("index.php?topic=3255"):
self.info["link"] = attr[1]
elif attr[1].startswith("
http://wintreath.com/index.php?part=profile&id="):
rightUrl = True
if attr[0] == "title" and attr[1].startswith("View the profile of "):
rightTitle = True
self.dataIsPlayer = rightUrl and rightTitle
if self.dataIsPlayer:
self.collectingInfo = True
def handle_endtag(self, tag):
if tag == "div":
self.dataIsQuoteHeader = False
if self.dataIsPost:
self.divInPostDepth -= 1
if self.divInPostDepth == 0:
self.dataIsPost = False
self.info["post"] = self.info["post"].strip()
lowerPost = self.info["post"].lower()
if self.info["player"] != "Pengu" and ( not self.info["time"] or self.info["time"] > self.latestPostTime() ):
while True:
os.system('clear')
guessVoteRegexes = [
re.compile("[Vv]ote:\s*([^\s.,]*)"),
re.compile("[Ii]\s*[Vv]ote\s*([^\s.,]*)"),
re.compile("[Vv]ote\s*([^\s.,]*)"),
re.compile("[Vv]ote\s*for\s*
re.compile("[Vv]oting\s*for\s*
]
guessVoteMatches = []
for line in self.info["post"].split("\n"):
for regex in guessVoteRegexes:
match = regex.search(line)
if match:
print("{}. {}".format(len(guessVoteMatches) + 1, match.group(1)))
guessVoteMatches.append(match.group(1))
command = raw_input("\n{}\n{}\n{}\n".format(self.info["player"], self.info["time"], self.info["post"]))
inputs = command.split(" ")
if self.commandCompare(inputs[0], "newday"):
self.allVotes.append([])
self.save()
elif self.commandCompare(inputs[0], "tabulate"):
self.outputVotes()
print("-----")
for day in self.allVotes:
for vote in day:
print "{}\n{}\n{}\n{}\n".format(vote["link"],vote["player"], vote["time"], vote["vote"])
raw_input()
elif self.commandCompare(inputs[0], "correct") and len(inputs) == 4:
day,idx,fld = (int(inputs[1]), int(inputs[2]), input[3])
if day < len(self.allVotes) and idx < len(self.allVotes[day]) and fld in self.allVotes[day][idx]:
print("Current Value: {}".format(self.allVotes[day][idx][fld]))
newValue = raw_input("New Value: ")
self.allVotes[day][idx][fld] = newValue
self.save()
elif self.commandCompare(inputs[0], "delete") and len(inputs) == 3:
day,idx = (int(inputs[1]), int(inputs[2]))
if day < len(self.allVotes) and idx < len(self.allVotes[day]):
del self.allVotes[day][idx]
self.save()
elif self.commandCompare(inputs[0], "exit"):
exit()
elif self.commandCompare(inputs[0], "vote") and len(inputs) >= 1:
player = " ".join(inputs[1:])
if player:
self.info["vote"] = player
self.allVotes[-1].append(self.info)
self.save()
break
elif self.commandCompare(inputs[0], "accuse") and len(inputs) >= 1:
players = " ".join(inputs[1:]).split(",")
if players:
self.info["accused"] = players
self.allVotes[-1].append(self.info)
self.save()
break
elif self.commandCompare(inputs[0], "defend") and len(inputs) >= 1:
players = " ".join(inputs[1:]).split(",")
if players:
self.info["defended"] = players
self.allVotes[-1].append(self.info)
self.save()
break
elif self.commandCompare(inputs[0],"skip"):
break
else:
try:
whichVote = int(inputs[0]) - 1
if whichVote < len(guessVoteMatches) and whichVote >= 0:
self.info["vote"] = guessVoteMatches[whichVote]
self.allVotes[-1].append(self.info)
self.save()
break
except:
raw_input("Unknown command!")
self.info = {"player":"","time":"","post":"","vote":""}
elif tag == "a":
self.dataIsPlayer = False
elif tag == "blockquote":
self.dataIsQuoteBody = False
def handle_data(self, data):
if self.dataIsPost:
if not self.dataIsQuoteBody and not self.dataIsQuoteHeader:
self.info["post"] += data
elif self.dataIsPlayer:
self.info["player"] += data
elif self.dataIsTime:
try:
self.info["time"] = datetime.datetime.strptime( data.strip(), "%B %d, %Y, %I:%M:%S %p" )
self.dataIsTime = False
except:
# so it's not a timestamp, big whoop.
pass
# instantiate the parser and fed it some HTML
parser = MyHTMLParser(raw_input("Load File: "))
# Top-level elements
for i in root.findall(".//item//response"):
page = base64.b64decode( i.text )
parser.feed(page)