{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "insert_query = \"INSERT IGNORE INTO default.TRANSACTION (ID, TDATE, ACCOUNTID, MEMO, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def find_owner(queried_name: str):\n", " from mysql.connector import connect, Error\n", "\n", " query = \"SELECT * FROM OWNER\"\n", " result = []\n", "\n", " try:\n", " with connect(\n", " host=\"localhost\",\n", " user=\"root\",\n", " password=\"pleasehashapasswordomg\",\n", " database=\"default\",\n", " ) as connection:\n", " # print(\"CONNECTED!\", connection)\n", " with connection.cursor() as cursor:\n", " cursor.execute(query)\n", " query_result = cursor.fetchall()\n", "\n", " for item in query_result:\n", " if item[1].lower() == queried_name:\n", " result.append(item[0])\n", " result.append(item[1])\n", " # print(result)\n", " # print(\"DONE!\")\n", " except Error as e:\n", " print(e)\n", " finally:\n", " connection.close()\n", "\n", " return result if result else None\n", " \n", "find_owner(\"daniel\")[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\n", "# OUROCARD_VISA_INFINITE-Ago_24.txt\n", "def create_lists():\n", " import re\n", "\n", " # Open the text file\n", " # with open(\"OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\", \"r\", encoding=\"latin\") as file:\n", " with open(\"OUROCARD_VISA_INFINITE-Ago_24.txt\", \"r\", encoding=\"latin\") as file:\n", " # Read the contents of the file\n", " contents = file.readlines()\n", "\n", " # Define the regex patterns\n", " owner_pattern = r\"\\d\\s?-\\s?([A-Z]+)\"\n", " line_pattern = r\"\\d{2}\\.\\d{2}\\.\\d{4}.{23}.{14}.{2}\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n", " payment_pattern = (r\"\\d{2}\\.\\d{2}\\.\\d{4}PGTO.*200211(\\s*-?\\d*\\.?\\d+,\\d{2})(\\s*\\d+,\\d{2})\")\n", " partial_invoice_line_pattern = r\"\\d{2}\\/\\d{2}.{27}.{16}.{2}\\s+\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n", "\n", " # Lists\n", " current_list = None\n", " owner_list = []\n", " result = {}\n", "\n", " # silly_counter = 1\n", " isPartial = True\n", "\n", " # Find Owners\n", " try:\n", " for line in contents:\n", " line = line.strip()\n", "\n", " found_owners = re.findall(owner_pattern, line)\n", " if found_owners:\n", " for owner_name in found_owners:\n", " list_name = f\"list_{owner_name.lower()}\"\n", " owner_list.append(list_name)\n", " result[list_name] = {}\n", " result[list_name][\"owner_name\"] = owner_name\n", " result[list_name][\"owner_id\"] = find_owner(owner_name.lower())[0]\n", " except:\n", " print(\"Error during owner search\")\n", "\n", "\n", " # Treat and create transaction lists\n", " try:\n", " for line in contents:\n", " line = line.strip()\n", "\n", " if re.match(owner_pattern, line):\n", " found_owner = re.match(owner_pattern, line)\n", " owner_list = f\"list_{found_owner.group(1).lower()}\"\n", " current_list = owner_list\n", " result[current_list][\"tlist\"] = []\n", " else:\n", " if re.match(payment_pattern, line):\n", " result[current_list][\"tlist\"].append(line)\n", " elif re.match(line_pattern, line) or re.match(partial_invoice_line_pattern, line):\n", " result[current_list][\"tlist\"].append(line)\n", " except:\n", " print(\"Error during Transaction Lists creation\")\n", "\n", " # Check file pattern\n", " sample = result[current_list][\"tlist\"][0]\n", " if re.match(line_pattern, sample):\n", " isPartial = False\n", "\n", " for listObj in result:\n", " result[listObj][\"isPartial\"] = isPartial\n", "\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def build_insert(input_dict: dict, account: int):\n", " from datetime import date, datetime\n", " import re\n", " import hashlib\n", "\n", " insert_bulk = []\n", "\n", " # RegEx Patterns\n", " line_group_pattern = r\"(?P\\d{2})\\.(?P\\d{2})\\.(?P\\d{4})(?:(?P.+PARC (?P\\d+.)\\/(?P\\d+)\\s.{12})|(?P.{37}))(?P.{2})(?P\\s*-?\\d*\\.?\\d+,\\d{2})(?P\\s*\\d*\\.?\\d+,\\d{2})\"\n", " partial_invoice_group_pattern = r\"(?P\\d{2})\\/(?P\\d{2})(?:(?P.+PARC (?P\\d{2})\\/(?P\\d{2}).{15})|(?P.{43}))(?P.{2})(?P\\s+\\s*-?\\d*\\.?\\d+,\\d{2})(?P\\s*\\d+,\\d{2})\"\n", " payment_pattern = r\"(?P\\d{2})\\.(?P\\d{2})\\.(?P\\d{4})(?PPGTO DEBITO CONTA).*200211(?P\\s*-?\\d*\\.?\\d+,\\d{2})(?P\\s*\\d+,\\d{2})\"\n", "\n", " for key in input_dict:\n", " if input_dict[key][\"isPartial\"]:\n", " pattern_to_use = partial_invoice_group_pattern\n", " else:\n", " pattern_to_use = line_group_pattern\n", "\n", " for item in input_dict[key][\"tlist\"]:\n", " # check for payment\n", " matches = re.match(payment_pattern, item)\n", " if matches:\n", " tTdate = str(\n", " date(\n", " int(matches.group(\"year\")),\n", " int(matches.group(\"month\")),\n", " int(matches.group(\"day\")),\n", " )\n", " )\n", " tAccount = account\n", " tMemo = matches.group(\"memo\")\n", " tCountry = None\n", " tOutflow = \"0.00\"\n", " tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\").replace(\"-\", \"\")\n", " tOwner = input_dict[key][\"owner_id\"]\n", " tInstallmentNr = None\n", " tInstallmentTt = None\n", " tCreated = str(datetime.now(tz=None))\n", " tUpdated = None\n", " else:\n", " matches = re.match(pattern_to_use, item)\n", " tTdate = str(\n", " date(\n", " # partial files will not have the year data on transactions\n", " int(matches.group(\"year\")) if pattern_to_use == line_group_pattern else datetime.now().year,\n", " int(matches.group(\"month\")),\n", " int(matches.group(\"day\")),\n", " )\n", " )\n", " \n", " tAccount = account\n", "\n", " tMemo = matches.group(\"p_memo\") if matches.group(\"p_memo\") else matches.group(\"memo\")\n", " tInstallmentNr = int(matches.group(\"p_nr\")) if matches.group(\"p_nr\") else None\n", " tInstallmentTt = int(matches.group(\"p_tt\")) if matches.group(\"p_tt\") else None\n", "\n", " tCountry = matches.group(\"country\")\n", " tOutflow = matches.group(\"outflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n", " tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n", " tOwner = input_dict[key][\"owner_id\"]\n", "\n", " tCreated = str(datetime.now(tz=None))\n", " tUpdated = None\n", "\n", " preHash = tTdate + tMemo + tOutflow + tInflow\n", " tId = hashlib.sha256(preHash.encode()).hexdigest()\n", "\n", " insert_bulk.append(\n", " (\n", " tId,\n", " tTdate,\n", " tAccount,\n", " tMemo,\n", " tCountry,\n", " tOutflow,\n", " tInflow,\n", " tOwner,\n", " tInstallmentNr,\n", " tInstallmentTt,\n", " tCreated,\n", " tUpdated,\n", " )\n", " )\n", "\n", " return insert_bulk" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def db_insert(insert_bulk: list[tuple]):\n", " from mysql.connector import connect, Error\n", "\n", " try:\n", " with connect(\n", " host=\"localhost\",\n", " user=\"root\",\n", " password=\"pleasehashapasswordomg\",\n", " database=\"default\",\n", " ) as connection:\n", " print(\"CONNECTED!\", connection)\n", " with connection.cursor() as cursor:\n", " cursor.executemany(insert_query, insert_bulk)\n", " connection.commit()\n", " print(\"DONE!\")\n", " except Error as e:\n", " print(e)\n", " finally:\n", " connection.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }