make editorid null not '' in parquet.

This commit is contained in:
Nathan TeBlunthuis 2025-05-29 18:24:33 -07:00
parent 606a399450
commit ffbd180001

8
wikiq
View File

@ -238,11 +238,11 @@ class RevDataBase:
revid: int revid: int
date_time: datetime date_time: datetime
articleid: int articleid: int
editorid: int
title: str title: str
namespace: int namespace: int
deleted: bool deleted: bool
edit_summary: str edit_summary: str
editorid: int = None
text_chars: int = None text_chars: int = None
revert: bool = None revert: bool = None
reverteds: list[int] = None reverteds: list[int] = None
@ -534,11 +534,11 @@ class WikiqParser:
rev_data = self.revdata_type(revid=rev.id, rev_data = self.revdata_type(revid=rev.id,
date_time=datetime.fromtimestamp(rev.timestamp.unix(), tz=timezone.utc), date_time=datetime.fromtimestamp(rev.timestamp.unix(), tz=timezone.utc),
articleid=page.id, articleid=page.id,
editorid="" if rev.deleted.user == True or rev.user.id is None else rev.user.id, editorid=None if rev.deleted.user == True or rev.user.id is None else rev.user.id,
title=page.title, title=page.title,
deleted=rev.deleted.text, deleted=rev.deleted.text,
namespace=namespace, namespace=namespace,
edit_summary="" if rev.comment is False else rev.comment edit_summary=rev.comment
) )
rev_data = self.matchmake_revision(rev, rev_data) rev_data = self.matchmake_revision(rev, rev_data)
@ -672,7 +672,7 @@ class WikiqParser:
cols = [] cols = []
first = rg[0] first = rg[0]
for col in first: for col in first:
cols.append([col]) cols.appnd([col])
for row in rg[1:]: for row in rg[1:]:
for j in range(len(cols)): for j in range(len(cols)):