1
0

fixing spark for querying data

This commit is contained in:
Matthew Gaughan 2025-01-08 11:54:02 -06:00
parent 8c934d93c5
commit 092306d777
5 changed files with 958 additions and 61 deletions

View File

@ -3,7 +3,7 @@ import bz2
import shutil import shutil
import os import os
FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files" FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/yearly_activity_files/"
def decompress(filepath): def decompress(filepath):
decompressed_filepath = filepath[:-4] decompressed_filepath = filepath[:-4]

View File

@ -192,9 +192,10 @@ ALL_PROJECTS = [
DUMP = "2024-11" DUMP = "2024-11"
test_url = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/aawiki/{DUMP}.aawiki.all-time.tsv.bz2" test_url = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/aawiki/{DUMP}.aawiki.all-time.tsv.bz2"
DUMP_LOC_PREFIX = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/" DUMP_LOC_PREFIX = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/"
FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/compressed_yearly_activity_files/" FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/compressed_monthly_activity_files/"
YEARS = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024'] YEARS = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
MONTHS = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
def download_file(url): def download_file(url):
print(f"Try to get {url}") print(f"Try to get {url}")
@ -209,14 +210,18 @@ def download_file(url):
print(f"Downloaded file {filename}") print(f"Downloaded file {filename}")
return filename return filename
def decompress(filepath):
decompressed_filepath = filepath[:-4]
with bz2.BZ2File(filepath) as fr, open(decompressed_filepath,"wb") as fw:
shutil.copyfileobj(fr,fw)
print(f"Decompressed {decompressed_filepath}")
os.remove(filepath)
print(f"Deleted {filepath}")
def batch_for_monthly():
urls = []
for entry in MONTHLY_PROJECTS:
for year in YEARS:
for month in MONTHS:
urls.append(f"{DUMP_LOC_PREFIX}{entry}/{DUMP}.{entry}.{year}-{month}.tsv.bz2")
for url in urls:
try:
download_file(url)
except Exception:
print(f"error! {url}")
def batch_for_yearly(): def batch_for_yearly():
urls = [] urls = []
@ -250,20 +255,5 @@ def batch_parallel_for_single():
print('generated an exception: %s' % (exc)) print('generated an exception: %s' % (exc))
''' '''
def decompress_directory(directory_name):
# Traverse the directory
for root, dirs, files in os.walk(directory_name):
for file in files:
if file.endswith('.bz2'):
# Full path to the file
filepath = os.path.join(root, file)
print(filepath)
# Apply the decompress function
decompress(filepath)
if __name__ == "__main__": if __name__ == "__main__":
batch_for_yearly() batch_for_monthly()
#decompress_directory(FILE_LOC_PREFIX)
#file = download_file(test_url)
#decompress("/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/2024-11.zuwiktionary.all-time.tsv.bz2")

View File

@ -0,0 +1,75 @@
Field class,Field name,Data type,Comment
Event_global,wiki_db,string,"enwiki, dewiki, eswiktionary, etc."
Event_global,event_entity,string,"revision, user or page"
Event_global,event_type,string,"create, move, delete, etc. Detailed explanation in the docs under #Event_types"
Event_global,event_timestamp,string,When this event ocurred
Event_global,event_comment,string,"Comment related to this event, sourced from log_comment, rev_comment, etc."
Event user,event_user_id,bigint,ID of the user that caused the event. Null if the user is anonymous or if from a revision where the user has been revision deleted.
Event user,event_user_text_historical,string,Historical username (IP address for anonymous user) of the user that caused the event. Null for revisions where the user has been revision deleted.
Event user,event_user_text,string,Current username of the user that caused the event. Null for anonymous users (the IP is stored in event_user_text_historical). Null for revisions where the user has been revision deleted.
Event user,event_user_blocks_historical,array<string>,Historical blocks of the user that caused the event
Event user,event_user_blocks,array<string>,Current blocks of the user that caused the event
Event user,event_user_groups_historical,array<string>,Historical groups of the user that caused the event
Event user,event_user_groups,array<string>,Current groups of the user that caused the event
Event user,event_user_is_bot_by_historical,array<string>,"Historical bot information of the user that caused the event, can contain values name or group"
Event user,event_user_is_bot_by,array<string>,"Bot information of the user that caused the event, can contain values name or group"
Event user,event_user_is_created_by_self,boolean,Whether the event_user created their own account
Event user,event_user_is_created_by_system,boolean,Whether the event_user account was created by mediawiki (eg. centralauth)
Event user,event_user_is_created_by_peer,boolean,Whether the event_user account was created by another user
Event user,event_user_is_anonymous,boolean,"Whether the event_user is not registered, using the old way that surfaced the IP publicly. True for revisions where the user has been revision deleted, even if the user was actually registered."
Event user,event_user_is_temporary,boolean,"Whether the event_user is not registered, using the new temporary account way. True for revisions where the user has been revision deleted, even if the user was actually registered."
Event user,event_user_is_permanent,boolean,Whether the event_user is registered.
Event user,event_user_registration_timestamp,string,Registration timestamp of the user that caused the event (from user table)
Event user,event_user_creation_timestamp,string,Creation timestamp of the user that caused the event (from logging table)
Event user,event_user_first_edit_timestamp,string,Timestamp of the first edit of the user that caused the event
Event user,event_user_revision_count,bigint,"Number of revisions made by the event_user up to the historical time in this wiki_db (only available in revision-create events so far). For revision-create events, this includes the event itself."
Event user,event_user_seconds_since_previous_revision,bigint,In revision events: seconds elapsed since the previous revision made by the current event_user_id (only available in revision-create events so far)
page,page_id,bigint,In revision/page events: id of the page
page,page_title_historical,string,In revision/page events: historical title of the page
page,page_title,string,In revision/page events: current title of the page
page,page_namespace_historical,int,In revision/page events: historical namespace of the page.
page,page_namespace_is_content_historical,boolean,In revision/page events: historical namespace of the page is categorized as content
page,page_namespace,int,In revision/page events: current namespace of the page
page,page_namespace_is_content,boolean,In revision/page events: current namespace of the page is categorized as content
page,page_is_redirect,boolean,In revision/page events: whether the page is currently a redirect
page,page_is_deleted,boolean,In revision/page events: Whether the page is rebuilt from a delete event
page,page_creation_timestamp,string,In revision/page events: creation timestamp of the page
page,page_first_edit_timestamp,string,In revision/page events: timestamp of the page's first revision. Can be before the page_creation in some restore/merge cases (see revision_is_from_before_page_creation).
page,page_revision_count,bigint,In revision/page events: Cumulative revision count per page for the current page_id (only available in revision-create events so far)
page,page_seconds_since_previous_revision,bigint,In revision/page events: seconds elapsed since the previous revision made on the current page_id (only available in revision-create events so far)
user,user_id,bigint,In user events: id of the user
user,user_text_historical,string,In user events: historical username or IP address of the user
user,user_text,string,In user events: current username or IP address of the user
user,user_blocks_historical,array<string>,In user events: historical user blocks
user,user_blocks,array<string>,In user events: current user blocks
user,user_groups_historical,array<string>,In user events: historical user groups
user,user_groups,array<string>,In user events: current user groups
user,user_is_bot_by_historical,array<string>,"In user events: Historical bot information of the user, can contain values name or group"
user,user_is_bot_by,array<string>,"In user events: Bot information of the user, can contain values name or group"
user,user_is_created_by_self,boolean,In user events: whether the user created their own account
user,user_is_created_by_system,boolean,In user events: whether the user account was created by mediawiki
user,user_is_created_by_peer,boolean,In user events: whether the user account was created by another user
user,user_is_anonymous,boolean,"In user events: whether the user is not registered, using the old way that surfaced the IP publicly"
user,user_is_temporary,boolean,"In user events: whether the user is not registered, using the new temporary account way"
user,user_is_permanent,boolean,In user events: whether the user is registered
user,user_registration_timestamp,string,In user events: registration timestamp of the user.
user,user_creation_timestamp,string,In user events: Creation timestamp of the user (from logging table)
user,user_first_edit_timestamp,string,In user events: Timestamp of the first edit of the user
revision,revision_id,bigint,In revision events: id of the revision
revision,revision_parent_id,bigint,In revision events: id of the parent revision
revision,revision_minor_edit,boolean,In revision events: whether it is a minor edit or not
revision,revision_deleted_parts,array<string>,"In revision events: Deleted parts of the revision, can contain values text, comment and user"
revision,revision_deleted_parts_are_suppressed,boolean,In revision events: Whether the deleted parts are deleted to admin as well (visible only by stewards)
revision,revision_text_bytes,bigint,In revision events: number of bytes of revision
revision,revision_text_bytes_diff,bigint,In revision events: change in bytes relative to parent revision (can be negative).
revision,revision_text_sha1,string,In revision events: sha1 hash of the revision
revision,revision_content_model,string,In revision events: content model of revision
revision,revision_content_format,string,In revision events: content format of revision
revision,revision_is_deleted_by_page_deletion,boolean,In revision events: whether this revision has been deleted (moved to archive table)
revision,revision_deleted_by_page_deletion_timestamp,string,In revision events: the timestamp when the revision was deleted
revision,revision_is_identity_reverted,boolean,In revision events: whether this revision was reverted by another future revision
revision,revision_first_identity_reverting_revision_id,bigint,In revision events: id of the revision that reverted this revision
revision,revision_seconds_to_identity_revert,bigint,In revision events: seconds elapsed between revision posting and its revert (if there was one)
revision,revision_is_identity_revert,boolean,In revision events: whether this revision reverts other revisions
revision,revision_is_from_before_page_creation,boolean,In revision events: True if the revision timestamp is before the page creation (can happen with restore events)
revision,revision_tags,array<string>,In revision events: Tags associated to the revision
1 Field class Field name Data type Comment
2 Event_global wiki_db string enwiki, dewiki, eswiktionary, etc.
3 Event_global event_entity string revision, user or page
4 Event_global event_type string create, move, delete, etc. Detailed explanation in the docs under #Event_types
5 Event_global event_timestamp string When this event ocurred
6 Event_global event_comment string Comment related to this event, sourced from log_comment, rev_comment, etc.
7 Event user event_user_id bigint ID of the user that caused the event. Null if the user is anonymous or if from a revision where the user has been revision deleted.
8 Event user event_user_text_historical string Historical username (IP address for anonymous user) of the user that caused the event. Null for revisions where the user has been revision deleted.
9 Event user event_user_text string Current username of the user that caused the event. Null for anonymous users (the IP is stored in event_user_text_historical). Null for revisions where the user has been revision deleted.
10 Event user event_user_blocks_historical array<string> Historical blocks of the user that caused the event
11 Event user event_user_blocks array<string> Current blocks of the user that caused the event
12 Event user event_user_groups_historical array<string> Historical groups of the user that caused the event
13 Event user event_user_groups array<string> Current groups of the user that caused the event
14 Event user event_user_is_bot_by_historical array<string> Historical bot information of the user that caused the event, can contain values name or group
15 Event user event_user_is_bot_by array<string> Bot information of the user that caused the event, can contain values name or group
16 Event user event_user_is_created_by_self boolean Whether the event_user created their own account
17 Event user event_user_is_created_by_system boolean Whether the event_user account was created by mediawiki (eg. centralauth)
18 Event user event_user_is_created_by_peer boolean Whether the event_user account was created by another user
19 Event user event_user_is_anonymous boolean Whether the event_user is not registered, using the old way that surfaced the IP publicly. True for revisions where the user has been revision deleted, even if the user was actually registered.
20 Event user event_user_is_temporary boolean Whether the event_user is not registered, using the new temporary account way. True for revisions where the user has been revision deleted, even if the user was actually registered.
21 Event user event_user_is_permanent boolean Whether the event_user is registered.
22 Event user event_user_registration_timestamp string Registration timestamp of the user that caused the event (from user table)
23 Event user event_user_creation_timestamp string Creation timestamp of the user that caused the event (from logging table)
24 Event user event_user_first_edit_timestamp string Timestamp of the first edit of the user that caused the event
25 Event user event_user_revision_count bigint Number of revisions made by the event_user up to the historical time in this wiki_db (only available in revision-create events so far). For revision-create events, this includes the event itself.
26 Event user event_user_seconds_since_previous_revision bigint In revision events: seconds elapsed since the previous revision made by the current event_user_id (only available in revision-create events so far)
27 page page_id bigint In revision/page events: id of the page
28 page page_title_historical string In revision/page events: historical title of the page
29 page page_title string In revision/page events: current title of the page
30 page page_namespace_historical int In revision/page events: historical namespace of the page.
31 page page_namespace_is_content_historical boolean In revision/page events: historical namespace of the page is categorized as content
32 page page_namespace int In revision/page events: current namespace of the page
33 page page_namespace_is_content boolean In revision/page events: current namespace of the page is categorized as content
34 page page_is_redirect boolean In revision/page events: whether the page is currently a redirect
35 page page_is_deleted boolean In revision/page events: Whether the page is rebuilt from a delete event
36 page page_creation_timestamp string In revision/page events: creation timestamp of the page
37 page page_first_edit_timestamp string In revision/page events: timestamp of the page's first revision. Can be before the page_creation in some restore/merge cases (see revision_is_from_before_page_creation).
38 page page_revision_count bigint In revision/page events: Cumulative revision count per page for the current page_id (only available in revision-create events so far)
39 page page_seconds_since_previous_revision bigint In revision/page events: seconds elapsed since the previous revision made on the current page_id (only available in revision-create events so far)
40 user user_id bigint In user events: id of the user
41 user user_text_historical string In user events: historical username or IP address of the user
42 user user_text string In user events: current username or IP address of the user
43 user user_blocks_historical array<string> In user events: historical user blocks
44 user user_blocks array<string> In user events: current user blocks
45 user user_groups_historical array<string> In user events: historical user groups
46 user user_groups array<string> In user events: current user groups
47 user user_is_bot_by_historical array<string> In user events: Historical bot information of the user, can contain values name or group
48 user user_is_bot_by array<string> In user events: Bot information of the user, can contain values name or group
49 user user_is_created_by_self boolean In user events: whether the user created their own account
50 user user_is_created_by_system boolean In user events: whether the user account was created by mediawiki
51 user user_is_created_by_peer boolean In user events: whether the user account was created by another user
52 user user_is_anonymous boolean In user events: whether the user is not registered, using the old way that surfaced the IP publicly
53 user user_is_temporary boolean In user events: whether the user is not registered, using the new temporary account way
54 user user_is_permanent boolean In user events: whether the user is registered
55 user user_registration_timestamp string In user events: registration timestamp of the user.
56 user user_creation_timestamp string In user events: Creation timestamp of the user (from logging table)
57 user user_first_edit_timestamp string In user events: Timestamp of the first edit of the user
58 revision revision_id bigint In revision events: id of the revision
59 revision revision_parent_id bigint In revision events: id of the parent revision
60 revision revision_minor_edit boolean In revision events: whether it is a minor edit or not
61 revision revision_deleted_parts array<string> In revision events: Deleted parts of the revision, can contain values text, comment and user
62 revision revision_deleted_parts_are_suppressed boolean In revision events: Whether the deleted parts are deleted to admin as well (visible only by stewards)
63 revision revision_text_bytes bigint In revision events: number of bytes of revision
64 revision revision_text_bytes_diff bigint In revision events: change in bytes relative to parent revision (can be negative).
65 revision revision_text_sha1 string In revision events: sha1 hash of the revision
66 revision revision_content_model string In revision events: content model of revision
67 revision revision_content_format string In revision events: content format of revision
68 revision revision_is_deleted_by_page_deletion boolean In revision events: whether this revision has been deleted (moved to archive table)
69 revision revision_deleted_by_page_deletion_timestamp string In revision events: the timestamp when the revision was deleted
70 revision revision_is_identity_reverted boolean In revision events: whether this revision was reverted by another future revision
71 revision revision_first_identity_reverting_revision_id bigint In revision events: id of the revision that reverted this revision
72 revision revision_seconds_to_identity_revert bigint In revision events: seconds elapsed between revision posting and its revert (if there was one)
73 revision revision_is_identity_revert boolean In revision events: whether this revision reverts other revisions
74 revision revision_is_from_before_page_creation boolean In revision events: True if the revision timestamp is before the page creation (can happen with restore events)
75 revision revision_tags array<string> In revision events: Tags associated to the revision

View File

@ -2,26 +2,15 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'pyspark.sql.SparkSession'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtypes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m count, lit, desc\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mSparkSession\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mSparkSession\u001b[39;00m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyspark.sql.SparkSession'"
]
}
],
"source": [ "source": [
"import re\n", "import re\n",
"import os\n",
"from pyspark.sql.types import StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType\n", "from pyspark.sql.types import StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType\n",
"from pyspark.sql.functions import count, lit, desc\n", "from pyspark.sql.functions import count, lit, desc\n",
"import pyspark.sql.SparkSession as SparkSession" "from pyspark.sql import SparkSession"
] ]
}, },
{ {
@ -30,7 +19,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"mediawiki_history_path = \"/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files\"" "os.environ['JAVA_HOME'] = \"/usr/lib/jvm/java-11-openjdk-amd64\"\n",
"os.environ['JRE_HOME'] = \"/usr/lib/jvm/java-11-openjdk-amd64/jre\""
] ]
}, },
{ {
@ -38,6 +28,15 @@
"execution_count": 3, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"mediawiki_history_path = \"/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [ "source": [
"# Note: string unescaping and array conversion is done later\n", "# Note: string unescaping and array conversion is done later\n",
"mediawiki_history_schema = StructType([\n", "mediawiki_history_schema = StructType([\n",
@ -125,19 +124,37 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "AttributeError", "data": {
"evalue": "module 'pyspark' has no attribute 'read'", "text/plain": [
"output_type": "error", "'/usr/lib/jvm/java-11-openjdk-amd64'"
"traceback": [ ]
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", },
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "execution_count": 5,
"Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Note: It's important to set .option(\"quote\", \"\") to prevent spark to automaticallu use double-quotes to quote text\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m mediawiki_history_raw \u001b[38;5;241m=\u001b[39m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[38;5;241m.\u001b[39moption(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelimiter\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39moption(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquote\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mschema(mediawiki_history_schema)\u001b[38;5;241m.\u001b[39mcsv(mediawiki_history_path)\n", "metadata": {},
"\u001b[0;31mAttributeError\u001b[0m: module 'pyspark' has no attribute 'read'" "output_type": "execute_result"
}
],
"source": [
"os.environ['JAVA_HOME']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting default log level to \"WARN\".\n",
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
"25/01/08 11:39:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
] ]
} }
], ],
"source": [ "source": [
"spark = SparkSession\n", "spark = SparkSession.builder.appName('activityData').config(\"spark.driver.extraJavaOptions\", \"-Djava.home=/usr/lib/jvm/java-11-openjdk-amd64\").getOrCreate()\n",
"\n", "\n",
"# Note: It's important to set .option(\"quote\", \"\") to prevent spark to automaticallu use double-quotes to quote text\n", "# Note: It's important to set .option(\"quote\", \"\") to prevent spark to automaticallu use double-quotes to quote text\n",
"mediawiki_history_raw = spark.read.option(\"delimiter\", \"\\t\").option(\"quote\", \"\").schema(mediawiki_history_schema).csv(mediawiki_history_path)" "mediawiki_history_raw = spark.read.option(\"delimiter\", \"\\t\").option(\"quote\", \"\").schema(mediawiki_history_schema).csv(mediawiki_history_path)"
@ -145,7 +162,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -166,9 +183,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"<function __main__.toArray(str)>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"spark.udf.register(\"unescape\", unescape, StringType())\n", "spark.udf.register(\"unescape\", unescape, StringType())\n",
"spark.udf.register(\"to_array\", toArray, ArrayType(StringType(), False))" "spark.udf.register(\"to_array\", toArray, ArrayType(StringType(), False))"
@ -176,7 +204,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -261,9 +289,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------+-------+--------------+\n",
"|wiki_db |month |revision_count|\n",
"+--------------+-------+--------------+\n",
"|euwiki |2019-12|356355 |\n",
"|cewiki |2019-12|229351 |\n",
"|elwiktionary |2019-12|227666 |\n",
"|cywiki |2019-12|139174 |\n",
"|tgwiki |2019-12|65694 |\n",
"|zh_min_nanwiki|2019-12|59755 |\n",
"|bnwiki |2019-12|55698 |\n",
"|elwiki |2019-12|49604 |\n",
"|dewiktionary |2019-12|47897 |\n",
"|urwiki |2019-12|45793 |\n",
"+--------------+-------+--------------+\n",
"only showing top 10 rows\n",
"\n"
]
}
],
"source": [ "source": [
"mediawiki_history. \\\n", "mediawiki_history. \\\n",
" where(\"event_entity = 'revision' and event_type = 'create'\"). \\\n", " where(\"event_entity = 'revision' and event_type = 'create'\"). \\\n",

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -20,7 +20,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -942,7 +942,7 @@
" PosixPath('/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files/2024-11.zuwiktionary.all-time.tsv')]" " PosixPath('/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files/2024-11.zuwiktionary.all-time.tsv')]"
] ]
}, },
"execution_count": 3, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -955,14 +955,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Created temporary directory at /tmp/mwhd-pandas.lpx0uvef\n" "Created temporary directory at /tmp/mwhd-pandas.js1u9wkd\n"
] ]
} }
], ],
@ -971,6 +971,780 @@
"tmpdir_path = pathlib.Path(tmpdir.name)\n", "tmpdir_path = pathlib.Path(tmpdir.name)\n",
"print(f\"Created temporary directory at {tmpdir_path}\")" "print(f\"Created temporary directory at {tmpdir_path}\")"
] ]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#load in csv fields\\\n",
"fields_file = pathlib.Path(\"activity_schema.csv\")\n",
"\n",
"CSV_FIELDS = []\n",
"CSV_FIELDS_META = {}\n",
"with fields_file.open(\"r\") as infile:\n",
" reader = csv.reader(infile, delimiter=\",\")\n",
"\n",
" # skip header\n",
" next(reader)\n",
"\n",
" for line in reader:\n",
" fclass = line[0]\n",
" fname = line[1]\n",
" dtype = line[2]\n",
" comment = line[3]\n",
"\n",
" CSV_FIELDS.append(fname)\n",
"\n",
" if dtype == \"int\":\n",
" dtype = \"Int64\"\n",
" elif dtype == \"bigint\":\n",
" dtype = \"Int64\"\n",
" elif dtype == \"array<string>\":\n",
" dtype = \"object\"\n",
"\n",
" if \"timestamp\" in fname:\n",
" dtype = \"object\"\n",
"\n",
" CSV_FIELDS_META[fname] = {\"class\": fclass, \"dtype\": dtype, \"comment\": comment}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id\tfield \tdtype\n",
"----\t------------------------------------------------------------\t------\n",
"1\twiki_db \tstring\n",
"2\tevent_entity \tstring\n",
"3\tevent_type \tstring\n",
"4\tevent_timestamp \tobject\n",
"5\tevent_comment \tstring\n",
"6\tevent_user_id \tInt64\n",
"7\tevent_user_text_historical \tstring\n",
"8\tevent_user_text \tstring\n",
"9\tevent_user_blocks_historical \tobject\n",
"10\tevent_user_blocks \tobject\n",
"11\tevent_user_groups_historical \tobject\n",
"12\tevent_user_groups \tobject\n",
"13\tevent_user_is_bot_by_historical \tobject\n",
"14\tevent_user_is_bot_by \tobject\n",
"15\tevent_user_is_created_by_self \tboolean\n",
"16\tevent_user_is_created_by_system \tboolean\n",
"17\tevent_user_is_created_by_peer \tboolean\n",
"18\tevent_user_is_anonymous \tboolean\n",
"19\tevent_user_is_temporary \tboolean\n",
"20\tevent_user_is_permanent \tboolean\n",
"21\tevent_user_registration_timestamp \tobject\n",
"22\tevent_user_creation_timestamp \tobject\n",
"23\tevent_user_first_edit_timestamp \tobject\n",
"24\tevent_user_revision_count \tInt64\n",
"25\tevent_user_seconds_since_previous_revision \tInt64\n",
"26\tpage_id \tInt64\n",
"27\tpage_title_historical \tstring\n",
"28\tpage_title \tstring\n",
"29\tpage_namespace_historical \tInt64\n",
"30\tpage_namespace_is_content_historical \tboolean\n",
"31\tpage_namespace \tInt64\n",
"32\tpage_namespace_is_content \tboolean\n",
"33\tpage_is_redirect \tboolean\n",
"34\tpage_is_deleted \tboolean\n",
"35\tpage_creation_timestamp \tobject\n",
"36\tpage_first_edit_timestamp \tobject\n",
"37\tpage_revision_count \tInt64\n",
"38\tpage_seconds_since_previous_revision \tInt64\n",
"39\tuser_id \tInt64\n",
"40\tuser_text_historical \tstring\n",
"41\tuser_text \tstring\n",
"42\tuser_blocks_historical \tobject\n",
"43\tuser_blocks \tobject\n",
"44\tuser_groups_historical \tobject\n",
"45\tuser_groups \tobject\n",
"46\tuser_is_bot_by_historical \tobject\n",
"47\tuser_is_bot_by \tobject\n",
"48\tuser_is_created_by_self \tboolean\n",
"49\tuser_is_created_by_system \tboolean\n",
"50\tuser_is_created_by_peer \tboolean\n",
"51\tuser_is_anonymous \tboolean\n",
"52\tuser_is_temporary \tboolean\n",
"53\tuser_is_permanent \tboolean\n",
"54\tuser_registration_timestamp \tobject\n",
"55\tuser_creation_timestamp \tobject\n",
"56\tuser_first_edit_timestamp \tobject\n",
"57\trevision_id \tInt64\n",
"58\trevision_parent_id \tInt64\n",
"59\trevision_minor_edit \tboolean\n",
"60\trevision_deleted_parts \tobject\n",
"61\trevision_deleted_parts_are_suppressed \tboolean\n",
"62\trevision_text_bytes \tInt64\n",
"63\trevision_text_bytes_diff \tInt64\n",
"64\trevision_text_sha1 \tstring\n",
"65\trevision_content_model \tstring\n",
"66\trevision_content_format \tstring\n",
"67\trevision_is_deleted_by_page_deletion \tboolean\n",
"68\trevision_deleted_by_page_deletion_timestamp \tobject\n",
"69\trevision_is_identity_reverted \tboolean\n",
"70\trevision_first_identity_reverting_revision_id \tInt64\n",
"71\trevision_seconds_to_identity_revert \tInt64\n",
"72\trevision_is_identity_revert \tboolean\n",
"73\trevision_is_from_before_page_creation \tboolean\n",
"74\trevision_tags \tobject\n",
"id\tfield \tdtype\n",
"----\t------------------------------------------------------------\t------\n",
"4\tevent_timestamp \tobject\n",
"21\tevent_user_registration_timestamp \tobject\n",
"22\tevent_user_creation_timestamp \tobject\n",
"23\tevent_user_first_edit_timestamp \tobject\n",
"35\tpage_creation_timestamp \tobject\n",
"36\tpage_first_edit_timestamp \tobject\n",
"54\tuser_registration_timestamp \tobject\n",
"55\tuser_creation_timestamp \tobject\n",
"56\tuser_first_edit_timestamp \tobject\n",
"68\trevision_deleted_by_page_deletion_timestamp \tobject\n"
]
}
],
"source": [
"# +\n",
"maxl = 60\n",
"\n",
"print(f\"id\\t{'field': <{maxl}}\\tdtype\")\n",
"print(\"----\\t\" + \"-\" * maxl + \"\\t\" + \"------\")\n",
"for id, field in enumerate(CSV_FIELDS, start=1):\n",
" print(f\"{id}\\t{field: <{maxl}}\\t{CSV_FIELDS_META[field]['dtype']}\")\n",
"\n",
"# +\n",
"timestamp_fields = [\n",
" (id, field) for id, field in enumerate(CSV_FIELDS, start=1) if \"timestamp\" in field\n",
"]\n",
"\n",
"print(f\"id\\t{'field': <{maxl}}\\tdtype\")\n",
"print(\"----\\t\" + \"-\" * maxl + \"\\t\" + \"------\")\n",
"for id, field in timestamp_fields:\n",
" print(f\"{id}\\t{field: <{maxl}}\\t{CSV_FIELDS_META[field]['dtype']}\")\n",
"\n",
"# +"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (14,15,16,17,27,29,31,37,38,41,42,43,44,45,46,47,48,49,50,51) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,18,19,39,40,41,42,43,44,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,13,17,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,17,19,39,40,41,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,19,39,40,41,42,43,44,50,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,10,14,15,16,17,39,41,42,43,44,51,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,27,29,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,14,15,16,17,42,43,44,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,39,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,12,13,14,15,16,17,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,42,51,63) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,14,15,16,17,27,29,30,31,37,38,42,43,44,45,46,47,48,49,50,51,54,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,39,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,39,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,17,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,39,40,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,39,40,41,42,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,39,41,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,17,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,41,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,17,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,17,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,39,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,27,29,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,49,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,14,15,16,17,27,29,30,31,37,38,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,18,19,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,17,39,40,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,9,11,30,39,40,42,51,54,56,59,62,63,64,67,68) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,13,17,41,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,14,15,16,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,19,39,40,41,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,12,13,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,17,41,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,18,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,17,18,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,18,19,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,11,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,11,12,13,17,30,39,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,13,14,15,16,17,39,40,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,41) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,13,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,39,40,42,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,18,19,39,40,41,42,43,44,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,41,43,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,39,40,41,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,41,42,43,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,14,15,16,17,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,10,11,14,15,16,17,27,29,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,19,39,40,41,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,19,39,40,41,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,18,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,14,15,16,17,27,29,30,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,54,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,14,15,16,17,39,40,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,13,17,39,40,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,12,13,14,15,16,17,18,19,20,27,29,31,37,38,41,42,43,44,45,46,47,48,49,50,51,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,14,15,16,17,27,29,30,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,19,39,40,41,42,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,39,40,41,42,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,14,15,16,17,27,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,39,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,18,19,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,51,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (14,15,16,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,19,39,40,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,13,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n",
"/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" tmpdf = pd.read_csv(\n"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"df_list = []\n",
"\n",
"for file in csv_files:\n",
" tmpdf = pd.read_csv(\n",
" file,\n",
" delimiter=\"\\t\",\n",
" encoding=\"utf-8\",\n",
" quotechar='\"',\n",
" quoting=csv.QUOTE_NONE,\n",
" doublequote=False,\n",
" header=None,\n",
" names=CSV_FIELDS,\n",
" #dtype={field: CSV_FIELDS_META[field][\"dtype\"] for field in CSV_FIELDS},\n",
" #date_format={\n",
" # field: \"%Y-%m-%d %H:%M:%S.%f\"\n",
" # for field in CSV_FIELDS\n",
" # if \"timestamp\" in field\n",
" #},\n",
" )\n",
" df_list.append(tmpdf)\n",
"\n",
"df = pd.concat(df_list)\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {