diff --git a/src/helper_scripts/decompression_script.py b/src/helper_scripts/decompression_script.py index 24d30ed..d83c2ef 100644 --- a/src/helper_scripts/decompression_script.py +++ b/src/helper_scripts/decompression_script.py @@ -3,7 +3,7 @@ import bz2 import shutil import os -FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files" +FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/yearly_activity_files/" def decompress(filepath): decompressed_filepath = filepath[:-4] diff --git a/src/helper_scripts/dump_collector.py b/src/helper_scripts/dump_collector.py index 85f0a87..955258e 100644 --- a/src/helper_scripts/dump_collector.py +++ b/src/helper_scripts/dump_collector.py @@ -192,9 +192,10 @@ ALL_PROJECTS = [ DUMP = "2024-11" test_url = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/aawiki/{DUMP}.aawiki.all-time.tsv.bz2" DUMP_LOC_PREFIX = f"https://dumps.wikimedia.org/other/mediawiki_history/{DUMP}/" -FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/compressed_yearly_activity_files/" +FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/compressed_monthly_activity_files/" YEARS = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024'] +MONTHS = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] def download_file(url): print(f"Try to get {url}") @@ -209,14 +210,18 @@ def download_file(url): print(f"Downloaded file {filename}") return filename -def decompress(filepath): - decompressed_filepath = filepath[:-4] - with bz2.BZ2File(filepath) as fr, open(decompressed_filepath,"wb") as fw: - shutil.copyfileobj(fr,fw) - print(f"Decompressed {decompressed_filepath}") - os.remove(filepath) - print(f"Deleted {filepath}") +def batch_for_monthly(): + urls = [] + for entry in MONTHLY_PROJECTS: + for year in YEARS: + for month in MONTHS: + urls.append(f"{DUMP_LOC_PREFIX}{entry}/{DUMP}.{entry}.{year}-{month}.tsv.bz2") + for url in urls: + try: + download_file(url) + except Exception: + print(f"error! {url}") def batch_for_yearly(): urls = [] @@ -249,21 +254,6 @@ def batch_parallel_for_single(): except Exception as exc: print('generated an exception: %s' % (exc)) ''' - - -def decompress_directory(directory_name): - # Traverse the directory - for root, dirs, files in os.walk(directory_name): - for file in files: - if file.endswith('.bz2'): - # Full path to the file - filepath = os.path.join(root, file) - print(filepath) - # Apply the decompress function - decompress(filepath) if __name__ == "__main__": - batch_for_yearly() - #decompress_directory(FILE_LOC_PREFIX) - #file = download_file(test_url) - #decompress("/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/2024-11.zuwiktionary.all-time.tsv.bz2") \ No newline at end of file + batch_for_monthly() diff --git a/src/lib/spark-warehouse/activity_schema.csv b/src/lib/spark-warehouse/activity_schema.csv new file mode 100644 index 0000000..00bb72b --- /dev/null +++ b/src/lib/spark-warehouse/activity_schema.csv @@ -0,0 +1,75 @@ +Field class,Field name,Data type,Comment +Event_global,wiki_db,string,"enwiki, dewiki, eswiktionary, etc." +Event_global,event_entity,string,"revision, user or page" +Event_global,event_type,string,"create, move, delete, etc. Detailed explanation in the docs under #Event_types" +Event_global,event_timestamp,string,When this event ocurred +Event_global,event_comment,string,"Comment related to this event, sourced from log_comment, rev_comment, etc." +Event user,event_user_id,bigint,ID of the user that caused the event. Null if the user is anonymous or if from a revision where the user has been revision deleted. +Event user,event_user_text_historical,string,Historical username (IP address for anonymous user) of the user that caused the event. Null for revisions where the user has been revision deleted. +Event user,event_user_text,string,Current username of the user that caused the event. Null for anonymous users (the IP is stored in event_user_text_historical). Null for revisions where the user has been revision deleted. +Event user,event_user_blocks_historical,array,Historical blocks of the user that caused the event +Event user,event_user_blocks,array,Current blocks of the user that caused the event +Event user,event_user_groups_historical,array,Historical groups of the user that caused the event +Event user,event_user_groups,array,Current groups of the user that caused the event +Event user,event_user_is_bot_by_historical,array,"Historical bot information of the user that caused the event, can contain values name or group" +Event user,event_user_is_bot_by,array,"Bot information of the user that caused the event, can contain values name or group" +Event user,event_user_is_created_by_self,boolean,Whether the event_user created their own account +Event user,event_user_is_created_by_system,boolean,Whether the event_user account was created by mediawiki (eg. centralauth) +Event user,event_user_is_created_by_peer,boolean,Whether the event_user account was created by another user +Event user,event_user_is_anonymous,boolean,"Whether the event_user is not registered, using the old way that surfaced the IP publicly. True for revisions where the user has been revision deleted, even if the user was actually registered." +Event user,event_user_is_temporary,boolean,"Whether the event_user is not registered, using the new temporary account way. True for revisions where the user has been revision deleted, even if the user was actually registered." +Event user,event_user_is_permanent,boolean,Whether the event_user is registered. +Event user,event_user_registration_timestamp,string,Registration timestamp of the user that caused the event (from user table) +Event user,event_user_creation_timestamp,string,Creation timestamp of the user that caused the event (from logging table) +Event user,event_user_first_edit_timestamp,string,Timestamp of the first edit of the user that caused the event +Event user,event_user_revision_count,bigint,"Number of revisions made by the event_user up to the historical time in this wiki_db (only available in revision-create events so far). For revision-create events, this includes the event itself." +Event user,event_user_seconds_since_previous_revision,bigint,In revision events: seconds elapsed since the previous revision made by the current event_user_id (only available in revision-create events so far) +page,page_id,bigint,In revision/page events: id of the page +page,page_title_historical,string,In revision/page events: historical title of the page +page,page_title,string,In revision/page events: current title of the page +page,page_namespace_historical,int,In revision/page events: historical namespace of the page. +page,page_namespace_is_content_historical,boolean,In revision/page events: historical namespace of the page is categorized as content +page,page_namespace,int,In revision/page events: current namespace of the page +page,page_namespace_is_content,boolean,In revision/page events: current namespace of the page is categorized as content +page,page_is_redirect,boolean,In revision/page events: whether the page is currently a redirect +page,page_is_deleted,boolean,In revision/page events: Whether the page is rebuilt from a delete event +page,page_creation_timestamp,string,In revision/page events: creation timestamp of the page +page,page_first_edit_timestamp,string,In revision/page events: timestamp of the page's first revision. Can be before the page_creation in some restore/merge cases (see revision_is_from_before_page_creation). +page,page_revision_count,bigint,In revision/page events: Cumulative revision count per page for the current page_id (only available in revision-create events so far) +page,page_seconds_since_previous_revision,bigint,In revision/page events: seconds elapsed since the previous revision made on the current page_id (only available in revision-create events so far) +user,user_id,bigint,In user events: id of the user +user,user_text_historical,string,In user events: historical username or IP address of the user +user,user_text,string,In user events: current username or IP address of the user +user,user_blocks_historical,array,In user events: historical user blocks +user,user_blocks,array,In user events: current user blocks +user,user_groups_historical,array,In user events: historical user groups +user,user_groups,array,In user events: current user groups +user,user_is_bot_by_historical,array,"In user events: Historical bot information of the user, can contain values name or group" +user,user_is_bot_by,array,"In user events: Bot information of the user, can contain values name or group" +user,user_is_created_by_self,boolean,In user events: whether the user created their own account +user,user_is_created_by_system,boolean,In user events: whether the user account was created by mediawiki +user,user_is_created_by_peer,boolean,In user events: whether the user account was created by another user +user,user_is_anonymous,boolean,"In user events: whether the user is not registered, using the old way that surfaced the IP publicly" +user,user_is_temporary,boolean,"In user events: whether the user is not registered, using the new temporary account way" +user,user_is_permanent,boolean,In user events: whether the user is registered +user,user_registration_timestamp,string,In user events: registration timestamp of the user. +user,user_creation_timestamp,string,In user events: Creation timestamp of the user (from logging table) +user,user_first_edit_timestamp,string,In user events: Timestamp of the first edit of the user +revision,revision_id,bigint,In revision events: id of the revision +revision,revision_parent_id,bigint,In revision events: id of the parent revision +revision,revision_minor_edit,boolean,In revision events: whether it is a minor edit or not +revision,revision_deleted_parts,array,"In revision events: Deleted parts of the revision, can contain values text, comment and user" +revision,revision_deleted_parts_are_suppressed,boolean,In revision events: Whether the deleted parts are deleted to admin as well (visible only by stewards) +revision,revision_text_bytes,bigint,In revision events: number of bytes of revision +revision,revision_text_bytes_diff,bigint,In revision events: change in bytes relative to parent revision (can be negative). +revision,revision_text_sha1,string,In revision events: sha1 hash of the revision +revision,revision_content_model,string,In revision events: content model of revision +revision,revision_content_format,string,In revision events: content format of revision +revision,revision_is_deleted_by_page_deletion,boolean,In revision events: whether this revision has been deleted (moved to archive table) +revision,revision_deleted_by_page_deletion_timestamp,string,In revision events: the timestamp when the revision was deleted +revision,revision_is_identity_reverted,boolean,In revision events: whether this revision was reverted by another future revision +revision,revision_first_identity_reverting_revision_id,bigint,In revision events: id of the revision that reverted this revision +revision,revision_seconds_to_identity_revert,bigint,In revision events: seconds elapsed between revision posting and its revert (if there was one) +revision,revision_is_identity_revert,boolean,In revision events: whether this revision reverts other revisions +revision,revision_is_from_before_page_creation,boolean,In revision events: True if the revision timestamp is before the page creation (can happen with restore events) +revision,revision_tags,array,In revision events: Tags associated to the revision diff --git a/src/lib/spark-warehouse/bot_isolation.ipynb b/src/lib/spark-warehouse/bot_isolation.ipynb index ed551f5..40ac30a 100644 --- a/src/lib/spark-warehouse/bot_isolation.ipynb +++ b/src/lib/spark-warehouse/bot_isolation.ipynb @@ -2,26 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'pyspark.sql.SparkSession'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtypes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m count, lit, desc\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyspark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msql\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mSparkSession\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mSparkSession\u001b[39;00m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyspark.sql.SparkSession'" - ] - } - ], + "outputs": [], "source": [ "import re\n", + "import os\n", "from pyspark.sql.types import StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType\n", "from pyspark.sql.functions import count, lit, desc\n", - "import pyspark.sql.SparkSession as SparkSession" + "from pyspark.sql import SparkSession" ] }, { @@ -30,7 +19,8 @@ "metadata": {}, "outputs": [], "source": [ - "mediawiki_history_path = \"/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files\"" + "os.environ['JAVA_HOME'] = \"/usr/lib/jvm/java-11-openjdk-amd64\"\n", + "os.environ['JRE_HOME'] = \"/usr/lib/jvm/java-11-openjdk-amd64/jre\"" ] }, { @@ -38,6 +28,15 @@ "execution_count": 3, "metadata": {}, "outputs": [], + "source": [ + "mediawiki_history_path = \"/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], "source": [ "# Note: string unescaping and array conversion is done later\n", "mediawiki_history_schema = StructType([\n", @@ -125,19 +124,37 @@ "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "module 'pyspark' has no attribute 'read'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Note: It's important to set .option(\"quote\", \"\") to prevent spark to automaticallu use double-quotes to quote text\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m mediawiki_history_raw \u001b[38;5;241m=\u001b[39m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[38;5;241m.\u001b[39moption(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelimiter\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39moption(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquote\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mschema(mediawiki_history_schema)\u001b[38;5;241m.\u001b[39mcsv(mediawiki_history_path)\n", - "\u001b[0;31mAttributeError\u001b[0m: module 'pyspark' has no attribute 'read'" + "data": { + "text/plain": [ + "'/usr/lib/jvm/java-11-openjdk-amd64'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.environ['JAVA_HOME']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", + "25/01/08 11:39:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] } ], "source": [ - "spark = SparkSession\n", + "spark = SparkSession.builder.appName('activityData').config(\"spark.driver.extraJavaOptions\", \"-Djava.home=/usr/lib/jvm/java-11-openjdk-amd64\").getOrCreate()\n", "\n", "# Note: It's important to set .option(\"quote\", \"\") to prevent spark to automaticallu use double-quotes to quote text\n", "mediawiki_history_raw = spark.read.option(\"delimiter\", \"\\t\").option(\"quote\", \"\").schema(mediawiki_history_schema).csv(mediawiki_history_path)" @@ -145,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -166,9 +183,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "spark.udf.register(\"unescape\", unescape, StringType())\n", "spark.udf.register(\"to_array\", toArray, ArrayType(StringType(), False))" @@ -176,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -261,9 +289,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------+-------+--------------+\n", + "|wiki_db |month |revision_count|\n", + "+--------------+-------+--------------+\n", + "|euwiki |2019-12|356355 |\n", + "|cewiki |2019-12|229351 |\n", + "|elwiktionary |2019-12|227666 |\n", + "|cywiki |2019-12|139174 |\n", + "|tgwiki |2019-12|65694 |\n", + "|zh_min_nanwiki|2019-12|59755 |\n", + "|bnwiki |2019-12|55698 |\n", + "|elwiki |2019-12|49604 |\n", + "|dewiktionary |2019-12|47897 |\n", + "|urwiki |2019-12|45793 |\n", + "+--------------+-------+--------------+\n", + "only showing top 10 rows\n", + "\n" + ] + } + ], "source": [ "mediawiki_history. \\\n", " where(\"event_entity = 'revision' and event_type = 'create'\"). \\\n", diff --git a/src/lib/spark-warehouse/pandas_bot_isolation.ipynb b/src/lib/spark-warehouse/pandas_bot_isolation.ipynb index a9b2680..583decc 100644 --- a/src/lib/spark-warehouse/pandas_bot_isolation.ipynb +++ b/src/lib/spark-warehouse/pandas_bot_isolation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -942,7 +942,7 @@ " PosixPath('/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/single_activity_files/2024-11.zuwiktionary.all-time.tsv')]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -955,14 +955,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Created temporary directory at /tmp/mwhd-pandas.lpx0uvef\n" + "Created temporary directory at /tmp/mwhd-pandas.js1u9wkd\n" ] } ], @@ -971,6 +971,780 @@ "tmpdir_path = pathlib.Path(tmpdir.name)\n", "print(f\"Created temporary directory at {tmpdir_path}\")" ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#load in csv fields\\\n", + "fields_file = pathlib.Path(\"activity_schema.csv\")\n", + "\n", + "CSV_FIELDS = []\n", + "CSV_FIELDS_META = {}\n", + "with fields_file.open(\"r\") as infile:\n", + " reader = csv.reader(infile, delimiter=\",\")\n", + "\n", + " # skip header\n", + " next(reader)\n", + "\n", + " for line in reader:\n", + " fclass = line[0]\n", + " fname = line[1]\n", + " dtype = line[2]\n", + " comment = line[3]\n", + "\n", + " CSV_FIELDS.append(fname)\n", + "\n", + " if dtype == \"int\":\n", + " dtype = \"Int64\"\n", + " elif dtype == \"bigint\":\n", + " dtype = \"Int64\"\n", + " elif dtype == \"array\":\n", + " dtype = \"object\"\n", + "\n", + " if \"timestamp\" in fname:\n", + " dtype = \"object\"\n", + "\n", + " CSV_FIELDS_META[fname] = {\"class\": fclass, \"dtype\": dtype, \"comment\": comment}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id\tfield \tdtype\n", + "----\t------------------------------------------------------------\t------\n", + "1\twiki_db \tstring\n", + "2\tevent_entity \tstring\n", + "3\tevent_type \tstring\n", + "4\tevent_timestamp \tobject\n", + "5\tevent_comment \tstring\n", + "6\tevent_user_id \tInt64\n", + "7\tevent_user_text_historical \tstring\n", + "8\tevent_user_text \tstring\n", + "9\tevent_user_blocks_historical \tobject\n", + "10\tevent_user_blocks \tobject\n", + "11\tevent_user_groups_historical \tobject\n", + "12\tevent_user_groups \tobject\n", + "13\tevent_user_is_bot_by_historical \tobject\n", + "14\tevent_user_is_bot_by \tobject\n", + "15\tevent_user_is_created_by_self \tboolean\n", + "16\tevent_user_is_created_by_system \tboolean\n", + "17\tevent_user_is_created_by_peer \tboolean\n", + "18\tevent_user_is_anonymous \tboolean\n", + "19\tevent_user_is_temporary \tboolean\n", + "20\tevent_user_is_permanent \tboolean\n", + "21\tevent_user_registration_timestamp \tobject\n", + "22\tevent_user_creation_timestamp \tobject\n", + "23\tevent_user_first_edit_timestamp \tobject\n", + "24\tevent_user_revision_count \tInt64\n", + "25\tevent_user_seconds_since_previous_revision \tInt64\n", + "26\tpage_id \tInt64\n", + "27\tpage_title_historical \tstring\n", + "28\tpage_title \tstring\n", + "29\tpage_namespace_historical \tInt64\n", + "30\tpage_namespace_is_content_historical \tboolean\n", + "31\tpage_namespace \tInt64\n", + "32\tpage_namespace_is_content \tboolean\n", + "33\tpage_is_redirect \tboolean\n", + "34\tpage_is_deleted \tboolean\n", + "35\tpage_creation_timestamp \tobject\n", + "36\tpage_first_edit_timestamp \tobject\n", + "37\tpage_revision_count \tInt64\n", + "38\tpage_seconds_since_previous_revision \tInt64\n", + "39\tuser_id \tInt64\n", + "40\tuser_text_historical \tstring\n", + "41\tuser_text \tstring\n", + "42\tuser_blocks_historical \tobject\n", + "43\tuser_blocks \tobject\n", + "44\tuser_groups_historical \tobject\n", + "45\tuser_groups \tobject\n", + "46\tuser_is_bot_by_historical \tobject\n", + "47\tuser_is_bot_by \tobject\n", + "48\tuser_is_created_by_self \tboolean\n", + "49\tuser_is_created_by_system \tboolean\n", + "50\tuser_is_created_by_peer \tboolean\n", + "51\tuser_is_anonymous \tboolean\n", + "52\tuser_is_temporary \tboolean\n", + "53\tuser_is_permanent \tboolean\n", + "54\tuser_registration_timestamp \tobject\n", + "55\tuser_creation_timestamp \tobject\n", + "56\tuser_first_edit_timestamp \tobject\n", + "57\trevision_id \tInt64\n", + "58\trevision_parent_id \tInt64\n", + "59\trevision_minor_edit \tboolean\n", + "60\trevision_deleted_parts \tobject\n", + "61\trevision_deleted_parts_are_suppressed \tboolean\n", + "62\trevision_text_bytes \tInt64\n", + "63\trevision_text_bytes_diff \tInt64\n", + "64\trevision_text_sha1 \tstring\n", + "65\trevision_content_model \tstring\n", + "66\trevision_content_format \tstring\n", + "67\trevision_is_deleted_by_page_deletion \tboolean\n", + "68\trevision_deleted_by_page_deletion_timestamp \tobject\n", + "69\trevision_is_identity_reverted \tboolean\n", + "70\trevision_first_identity_reverting_revision_id \tInt64\n", + "71\trevision_seconds_to_identity_revert \tInt64\n", + "72\trevision_is_identity_revert \tboolean\n", + "73\trevision_is_from_before_page_creation \tboolean\n", + "74\trevision_tags \tobject\n", + "id\tfield \tdtype\n", + "----\t------------------------------------------------------------\t------\n", + "4\tevent_timestamp \tobject\n", + "21\tevent_user_registration_timestamp \tobject\n", + "22\tevent_user_creation_timestamp \tobject\n", + "23\tevent_user_first_edit_timestamp \tobject\n", + "35\tpage_creation_timestamp \tobject\n", + "36\tpage_first_edit_timestamp \tobject\n", + "54\tuser_registration_timestamp \tobject\n", + "55\tuser_creation_timestamp \tobject\n", + "56\tuser_first_edit_timestamp \tobject\n", + "68\trevision_deleted_by_page_deletion_timestamp \tobject\n" + ] + } + ], + "source": [ + "# +\n", + "maxl = 60\n", + "\n", + "print(f\"id\\t{'field': <{maxl}}\\tdtype\")\n", + "print(\"----\\t\" + \"-\" * maxl + \"\\t\" + \"------\")\n", + "for id, field in enumerate(CSV_FIELDS, start=1):\n", + " print(f\"{id}\\t{field: <{maxl}}\\t{CSV_FIELDS_META[field]['dtype']}\")\n", + "\n", + "# +\n", + "timestamp_fields = [\n", + " (id, field) for id, field in enumerate(CSV_FIELDS, start=1) if \"timestamp\" in field\n", + "]\n", + "\n", + "print(f\"id\\t{'field': <{maxl}}\\tdtype\")\n", + "print(\"----\\t\" + \"-\" * maxl + \"\\t\" + \"------\")\n", + "for id, field in timestamp_fields:\n", + " print(f\"{id}\\t{field: <{maxl}}\\t{CSV_FIELDS_META[field]['dtype']}\")\n", + "\n", + "# +" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (14,15,16,17,27,29,31,37,38,41,42,43,44,45,46,47,48,49,50,51) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,18,19,39,40,41,42,43,44,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,13,17,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,17,19,39,40,41,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,19,39,40,41,42,43,44,50,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,10,14,15,16,17,39,41,42,43,44,51,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,27,29,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,14,15,16,17,42,43,44,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,39,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,12,13,14,15,16,17,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,42,51,63) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,14,15,16,17,27,29,30,31,37,38,42,43,44,45,46,47,48,49,50,51,54,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,39,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,39,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,17,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,39,40,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,39,40,41,42,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,39,41,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,17,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,41,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,17,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,17,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,39,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,13,14,15,16,17,27,29,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,49,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,14,15,16,17,27,29,30,31,37,38,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,18,19,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,17,39,40,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,9,11,30,39,40,42,51,54,56,59,62,63,64,67,68) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,11,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,13,17,41,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,14,15,16,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,19,39,40,41,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,8,9,10,12,13,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,59,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,17,41,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,18,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,17,18,19,39,40,41,42,43,44,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,18,19,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,11,39,40,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (4,11,12,13,17,30,39,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,13,14,15,16,17,39,40,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,17,41) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,13,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,39,40,42,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,18,19,39,40,41,42,43,44,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,18,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,41,43,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,39,40,41,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,41,42,43,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,12,14,15,16,17,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,10,11,14,15,16,17,27,29,30,31,37,38,39,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,19,39,40,41,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,19,39,40,41,42,43,44,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,27,29,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,18,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,14,15,16,17,27,29,30,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,54,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,39,40,41,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,54,55,56,62,63,64,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,14,15,16,17,39,40,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,13,17,39,40,41,42,43,44,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,39,40,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,12,13,14,15,16,17,18,19,20,27,29,31,37,38,41,42,43,44,45,46,47,48,49,50,51,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,20,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,14,15,16,17,27,29,30,31,37,38,39,40,42,43,44,45,46,47,48,49,50,51,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,13,14,15,16,17,27,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,19,39,40,41,42,49,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (42,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,39,40,41,42,43,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,17,39,40,41,42,43,44,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,39,40,41,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,14,15,16,17,19,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,11,12,14,15,16,17,27,37,38,39,40,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,11,39,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,12,14,15,16,17,39,40,41,42,43,44,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,14,15,16,17,18,19,27,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,41,42,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,11,12,13,14,15,16,17,18,19,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,39,40,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,12,13,17,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,51,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (14,15,16,17,42,43,44,63) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (17,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (10,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,12,17,39,40,42,43,44,63,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,19,39,40,42,50,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (11,17,42,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (12,13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (13,17,42,43,44,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,10,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,17,39,40,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (9,17,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,12,13,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,39,40,41,42,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n", + "/tmp/ipykernel_1030433/720178351.py:4: DtypeWarning: Columns (8,9,10,12,17,39,40,41,42,43,44,55,69) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " tmpdf = pd.read_csv(\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "df_list = []\n", + "\n", + "for file in csv_files:\n", + " tmpdf = pd.read_csv(\n", + " file,\n", + " delimiter=\"\\t\",\n", + " encoding=\"utf-8\",\n", + " quotechar='\"',\n", + " quoting=csv.QUOTE_NONE,\n", + " doublequote=False,\n", + " header=None,\n", + " names=CSV_FIELDS,\n", + " #dtype={field: CSV_FIELDS_META[field][\"dtype\"] for field in CSV_FIELDS},\n", + " #date_format={\n", + " # field: \"%Y-%m-%d %H:%M:%S.%f\"\n", + " # for field in CSV_FIELDS\n", + " # if \"timestamp\" in field\n", + " #},\n", + " )\n", + " df_list.append(tmpdf)\n", + "\n", + "df = pd.concat(df_list)\n", + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {