You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
263 lines
10 KiB
263 lines
10 KiB
{ |
|
"cells": [ |
|
{ |
|
"cell_type": "code", |
|
"execution_count": null, |
|
"metadata": {}, |
|
"outputs": [], |
|
"source": [ |
|
"insert_query = \"INSERT IGNORE INTO default.TRANSACTION (ID, TDATE, ACCOUNTID, MEMO, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )\"" |
|
] |
|
}, |
|
{ |
|
"cell_type": "code", |
|
"execution_count": null, |
|
"metadata": {}, |
|
"outputs": [], |
|
"source": [ |
|
"def find_owner(queried_name: str):\n", |
|
" from mysql.connector import connect, Error\n", |
|
"\n", |
|
" query = \"SELECT * FROM OWNER\"\n", |
|
" result = []\n", |
|
"\n", |
|
" try:\n", |
|
" with connect(\n", |
|
" host=\"localhost\",\n", |
|
" user=\"root\",\n", |
|
" password=\"pleasehashapasswordomg\",\n", |
|
" database=\"default\",\n", |
|
" ) as connection:\n", |
|
" # print(\"CONNECTED!\", connection)\n", |
|
" with connection.cursor() as cursor:\n", |
|
" cursor.execute(query)\n", |
|
" query_result = cursor.fetchall()\n", |
|
"\n", |
|
" for item in query_result:\n", |
|
" if item[1].lower() == queried_name:\n", |
|
" result.append(item[0])\n", |
|
" result.append(item[1])\n", |
|
" # print(result)\n", |
|
" # print(\"DONE!\")\n", |
|
" except Error as e:\n", |
|
" print(e)\n", |
|
" finally:\n", |
|
" connection.close()\n", |
|
"\n", |
|
" return result if result else None\n", |
|
" \n", |
|
"find_owner(\"daniel\")[0]" |
|
] |
|
}, |
|
{ |
|
"cell_type": "code", |
|
"execution_count": null, |
|
"metadata": {}, |
|
"outputs": [], |
|
"source": [ |
|
"# OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\n", |
|
"# OUROCARD_VISA_INFINITE-Ago_24.txt\n", |
|
"def create_lists():\n", |
|
" import re\n", |
|
"\n", |
|
" # Open the text file\n", |
|
" # with open(\"OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\", \"r\", encoding=\"latin\") as file:\n", |
|
" with open(\"OUROCARD_VISA_INFINITE-Ago_24.txt\", \"r\", encoding=\"latin\") as file:\n", |
|
" # Read the contents of the file\n", |
|
" contents = file.readlines()\n", |
|
"\n", |
|
" # Define the regex patterns\n", |
|
" owner_pattern = r\"\\d\\s?-\\s?([A-Z]+)\"\n", |
|
" line_pattern = r\"\\d{2}\\.\\d{2}\\.\\d{4}.{23}.{14}.{2}\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n", |
|
" payment_pattern = (r\"\\d{2}\\.\\d{2}\\.\\d{4}PGTO.*200211(\\s*-?\\d*\\.?\\d+,\\d{2})(\\s*\\d+,\\d{2})\")\n", |
|
" partial_invoice_line_pattern = r\"\\d{2}\\/\\d{2}.{27}.{16}.{2}\\s+\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n", |
|
"\n", |
|
" # Lists\n", |
|
" current_list = None\n", |
|
" owner_list = []\n", |
|
" result = {}\n", |
|
"\n", |
|
" # silly_counter = 1\n", |
|
" isPartial = True\n", |
|
"\n", |
|
" # Find Owners\n", |
|
" try:\n", |
|
" for line in contents:\n", |
|
" line = line.strip()\n", |
|
"\n", |
|
" found_owners = re.findall(owner_pattern, line)\n", |
|
" if found_owners:\n", |
|
" for owner_name in found_owners:\n", |
|
" list_name = f\"list_{owner_name.lower()}\"\n", |
|
" owner_list.append(list_name)\n", |
|
" result[list_name] = {}\n", |
|
" result[list_name][\"owner_name\"] = owner_name\n", |
|
" result[list_name][\"owner_id\"] = find_owner(owner_name.lower())[0]\n", |
|
" except:\n", |
|
" print(\"Error during owner search\")\n", |
|
"\n", |
|
"\n", |
|
" # Treat and create transaction lists\n", |
|
" try:\n", |
|
" for line in contents:\n", |
|
" line = line.strip()\n", |
|
"\n", |
|
" if re.match(owner_pattern, line):\n", |
|
" found_owner = re.match(owner_pattern, line)\n", |
|
" owner_list = f\"list_{found_owner.group(1).lower()}\"\n", |
|
" current_list = owner_list\n", |
|
" result[current_list][\"tlist\"] = []\n", |
|
" else:\n", |
|
" if re.match(payment_pattern, line):\n", |
|
" result[current_list][\"tlist\"].append(line)\n", |
|
" elif re.match(line_pattern, line) or re.match(partial_invoice_line_pattern, line):\n", |
|
" result[current_list][\"tlist\"].append(line)\n", |
|
" except:\n", |
|
" print(\"Error during Transaction Lists creation\")\n", |
|
"\n", |
|
" # Check file pattern\n", |
|
" sample = result[current_list][\"tlist\"][0]\n", |
|
" if re.match(line_pattern, sample):\n", |
|
" isPartial = False\n", |
|
"\n", |
|
" for listObj in result:\n", |
|
" result[listObj][\"isPartial\"] = isPartial\n", |
|
"\n", |
|
" return result" |
|
] |
|
}, |
|
{ |
|
"cell_type": "code", |
|
"execution_count": null, |
|
"metadata": {}, |
|
"outputs": [], |
|
"source": [ |
|
"def build_insert(input_dict: dict, account: int):\n", |
|
" from datetime import date, datetime\n", |
|
" import re\n", |
|
" import hashlib\n", |
|
"\n", |
|
" insert_bulk = []\n", |
|
"\n", |
|
" # RegEx Patterns\n", |
|
" line_group_pattern = r\"(?P<day>\\d{2})\\.(?P<month>\\d{2})\\.(?P<year>\\d{4})(?:(?P<p_memo>.+PARC (?P<p_nr>\\d+.)\\/(?P<p_tt>\\d+)\\s.{12})|(?P<memo>.{37}))(?P<country>.{2})(?P<outflow>\\s*-?\\d*\\.?\\d+,\\d{2})(?P<inflow>\\s*\\d*\\.?\\d+,\\d{2})\"\n", |
|
" partial_invoice_group_pattern = r\"(?P<day>\\d{2})\\/(?P<month>\\d{2})(?:(?P<p_memo>.+PARC (?P<p_nr>\\d{2})\\/(?P<p_tt>\\d{2}).{15})|(?P<memo>.{43}))(?P<country>.{2})(?P<outflow>\\s+\\s*-?\\d*\\.?\\d+,\\d{2})(?P<inflow>\\s*\\d+,\\d{2})\"\n", |
|
" payment_pattern = r\"(?P<day>\\d{2})\\.(?P<month>\\d{2})\\.(?P<year>\\d{4})(?P<memo>PGTO DEBITO CONTA).*200211(?P<inflow>\\s*-?\\d*\\.?\\d+,\\d{2})(?P<outflow>\\s*\\d+,\\d{2})\"\n", |
|
"\n", |
|
" for key in input_dict:\n", |
|
" if input_dict[key][\"isPartial\"]:\n", |
|
" pattern_to_use = partial_invoice_group_pattern\n", |
|
" else:\n", |
|
" pattern_to_use = line_group_pattern\n", |
|
"\n", |
|
" for item in input_dict[key][\"tlist\"]:\n", |
|
" # check for payment\n", |
|
" matches = re.match(payment_pattern, item)\n", |
|
" if matches:\n", |
|
" tTdate = str(\n", |
|
" date(\n", |
|
" int(matches.group(\"year\")),\n", |
|
" int(matches.group(\"month\")),\n", |
|
" int(matches.group(\"day\")),\n", |
|
" )\n", |
|
" )\n", |
|
" tAccount = account\n", |
|
" tMemo = matches.group(\"memo\")\n", |
|
" tCountry = None\n", |
|
" tOutflow = \"0.00\"\n", |
|
" tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\").replace(\"-\", \"\")\n", |
|
" tOwner = input_dict[key][\"owner_id\"]\n", |
|
" tInstallmentNr = None\n", |
|
" tInstallmentTt = None\n", |
|
" tCreated = str(datetime.now(tz=None))\n", |
|
" tUpdated = None\n", |
|
" else:\n", |
|
" matches = re.match(pattern_to_use, item)\n", |
|
" tTdate = str(\n", |
|
" date(\n", |
|
" # partial files will not have the year data on transactions\n", |
|
" int(matches.group(\"year\")) if pattern_to_use == line_group_pattern else datetime.now().year,\n", |
|
" int(matches.group(\"month\")),\n", |
|
" int(matches.group(\"day\")),\n", |
|
" )\n", |
|
" )\n", |
|
" \n", |
|
" tAccount = account\n", |
|
"\n", |
|
" tMemo = matches.group(\"p_memo\") if matches.group(\"p_memo\") else matches.group(\"memo\")\n", |
|
" tInstallmentNr = int(matches.group(\"p_nr\")) if matches.group(\"p_nr\") else None\n", |
|
" tInstallmentTt = int(matches.group(\"p_tt\")) if matches.group(\"p_tt\") else None\n", |
|
"\n", |
|
" tCountry = matches.group(\"country\")\n", |
|
" tOutflow = matches.group(\"outflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n", |
|
" tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n", |
|
" tOwner = input_dict[key][\"owner_id\"]\n", |
|
"\n", |
|
" tCreated = str(datetime.now(tz=None))\n", |
|
" tUpdated = None\n", |
|
"\n", |
|
" preHash = tTdate + tMemo + tOutflow + tInflow\n", |
|
" tId = hashlib.sha256(preHash.encode()).hexdigest()\n", |
|
"\n", |
|
" insert_bulk.append(\n", |
|
" (\n", |
|
" tId,\n", |
|
" tTdate,\n", |
|
" tAccount,\n", |
|
" tMemo,\n", |
|
" tCountry,\n", |
|
" tOutflow,\n", |
|
" tInflow,\n", |
|
" tOwner,\n", |
|
" tInstallmentNr,\n", |
|
" tInstallmentTt,\n", |
|
" tCreated,\n", |
|
" tUpdated,\n", |
|
" )\n", |
|
" )\n", |
|
"\n", |
|
" return insert_bulk" |
|
] |
|
}, |
|
{ |
|
"cell_type": "code", |
|
"execution_count": null, |
|
"metadata": {}, |
|
"outputs": [], |
|
"source": [ |
|
"def db_insert(insert_bulk: list[tuple]):\n", |
|
" from mysql.connector import connect, Error\n", |
|
"\n", |
|
" try:\n", |
|
" with connect(\n", |
|
" host=\"localhost\",\n", |
|
" user=\"root\",\n", |
|
" password=\"pleasehashapasswordomg\",\n", |
|
" database=\"default\",\n", |
|
" ) as connection:\n", |
|
" print(\"CONNECTED!\", connection)\n", |
|
" with connection.cursor() as cursor:\n", |
|
" cursor.executemany(insert_query, insert_bulk)\n", |
|
" connection.commit()\n", |
|
" print(\"DONE!\")\n", |
|
" except Error as e:\n", |
|
" print(e)\n", |
|
" finally:\n", |
|
" connection.close()" |
|
] |
|
} |
|
], |
|
"metadata": { |
|
"kernelspec": { |
|
"display_name": "Python 3", |
|
"language": "python", |
|
"name": "python3" |
|
}, |
|
"language_info": { |
|
"name": "python", |
|
"version": "3.12.3" |
|
} |
|
}, |
|
"nbformat": 4, |
|
"nbformat_minor": 2 |
|
}
|
|
|