Browse Source

updates

master
Yutsuo 1 year ago
parent
commit
db15f0c46b
  1. 180
      OUROCARD_VISA_INFINITE-Próxima_Fatura(1).txt
  2. 99
      etl.py
  3. 1564
      pdfScrape.ipynb
  4. 23
      robopato.sql

180
OUROCARD_VISA_INFINITE-Próxima_Fatura(1).txt

@ -0,0 +1,180 @@
SISBB - Sistema de Informações Banco do Brasil
02/09/2024 Auto-Atendimento 12:52:13
Fatura do Cartão de Crédito
Cliente : DANIEL O CARVALHO
Nr.Cartão : 4984.****.****.5727
Modalidade : OUROCARD VISA INFINITE
--------------------------------------------------------------------------------
L A N Ç A M E N T O S F U T U R O S
--------------------------------------------------------------------------------
DEMONSTRATIVO
--------------------------------------------------------------------------------
Data Transações País Valor R$ Valor US$
--------------------------------------------------------------------------------
1-DANIEL O C
SALDO FATURA ANTERIOR BR 19.634,96 0,00
Pagamentos
26/08 PGTO DEBITO CONTA 8611 000006025 200 211 -19.634,96 0,00
Compras a vista
Compras Diversas
13/08 DL*GOOGLE YouTub SAO PAULO BR 41,90 0,00
12/08 STEAM PURCHASE SEATTLE DE 24,00 0,00
13/08 STEAMGAMES.COM 42595229912-1844160 WA 24,00 0,00
15/08 VELOE BARUERI BR 22,26 0,00
14/08 IFD*Lucas Tomaz Da SilvOsasco BR 5,00 0,00
16/08 UBER* TRIP WWW.UBER.COM. BR 30,98 0,00
16/08 UBER* TRIP WWW.UBER.COM. BR 5,00 0,00
16/08 UBER* TRIP WWW.UBER.COM. BR 26,59 0,00
16/08 UBER* TRIP WWW.UBER.COM. BR 5,00 0,00
16/08 DROGARIA SAO PAULO BRASILIA BR 1.219,44 0,00
16/08 IFD*Josue Cardoso De SoOsasco BR 10,00 0,00
18/08 NETFLIX ENTRETENIMENTO BARUERI BR 44,90 0,00
18/08 IFD*Pedro Henrique BarbOsasco BR 10,00 0,00
20/08 PAG*FolhaDeSPaulo SAO PAULO BR 29,90 0,00
20/08 MERCADOLIVRE*ROMEROSHOPOSASCO BR 125,89 0,00
21/08 CANTINA E CIA BRASILIA BR 8,50 0,00
21/08 TRACKEFIELD BRASILIA BR 1.259,20 0,00
21/08 MULTIPLAN BRASILIA BR 25,00 0,00
22/08 PAG*XsollaGames Sao Paulo BR 26,99 0,00
22/08 CANTINA E CIA BRASILIA BR 19,00 0,00
22/08 CINEMARK BRASIL SAO PAULO BR 18,24 0,00
22/08 PAGUE MENOS 1225 BRASILIA BR 52,97 0,00
23/08 MERCADOLIVRE*SOLDIERSNUOSASCO BR 288,70 0,00
22/08 MR JOHN BARBEARIA LTDA BRASILIA BR 60,00 0,00
24/08 IFD*Patrick Alves NunesOsasco BR 10,00 0,00
26/08 MP*MELIMAIS OSASCO BR 17,99 0,00
25/08 IFD*Italo Monteiro MaiaOsasco BR 5,00 0,00
26/08 DROGARIA SAO PAULO BRASILIA BR 92,87 0,00
Restaurantes
14/08 WINE GARDEN BRASILIA BR 261,51 0,00
14/08 IFD*KATSU HMK BAR E RESBRASILIA BR 65,99 0,00
16/08 IFD*TACO PEP RESTAURANTBRASILIA BR 232,90 0,00
18/08 IFD*IFOOD.COM AGENCIA DOsasco BR 12,90 0,00
18/08 IFD*H.L.F HAMBURGUERIA BRASILIA BR 159,59 0,00
23/08 IFD*TT BRASILIA COMERCIBRASILIA BR 102,00 0,00
23/08 T.T. BURGER BRASILIA BR 18,00 0,00
25/08 RESTAURANTE 61 ASA SUL BRASILIA BR 101,74 0,00
24/08 IFD*MIPA CULINARIA CONSBRASILIA BR 82,99 0,00
25/08 IFD*NFE COMERCIO DE ALIBRASILIA BR 80,79 0,00
Hospitais
16/08 LISTO *CLINICAESTETIC BRASILIA BR 1.330,00 0,00
Compras por mala direta/telefone/web
27/08 Wellhub Gympass BR GympSao Paulo BR 399,90 0,00
Compras/Pagamento de contas parceladas
Compras Diversas
11/04 PRODUTOS GLOB PARC 05/12 RIO DE JANEI BR 44,90 0,00
15/01 MP*MUNDODOSCO PARC 08/10 SAO PAULO BR 159,90 0,00
17/05 PAG*Folhadesp PARC 04/06 Sao Paulo BR 109,60 0,00
27/05 PARC=112 BRAS PARC 04/12 BRASILIA BR 452,00 0,00
Débitos diversos
14/08 IOF - COMPRA NO EXTERIOR 0,26 0,00
14/08 IOF - COMPRA NO EXTERIOR 0,26 0,00
SubTotal 7.124,55 0,00
4-IZABELY C
Compras a vista
Compras Diversas
12/08 UBER* TRIP WWW.UBER.COM. BR 6,90 0,00
14/08 BrasilC*SHOPPING ENXOV Brasilia BR 319,96 0,00
15/08 UBER* ONE OSASCO BR 19,90 0,00
14/08 UBER * PENDING SAO PAULO BR 37,14 0,00
18/08 DROGARIO ROSARIO BRASILIA BR 252,05 0,00
20/08 DiogoLealPimenta BRASILIA BR 86,00 0,00
20/08 DROGASIL 2067 BRASILIA BR 226,96 0,00
20/08 IFD*O PUDIM PERFEITO FABRASILIA BR 89,90 0,00
21/08 UBER* TRIP WWW.UBER.COM. BR 6,18 0,00
21/08 MERCADOLIVRE*3PRODUTOS OSASCO BR 113,80 0,00
21/08 MERCADOLIVRE*3PRODUTOS OSASCO BR 65,96 0,00
22/08 UBER* TRIP WWW.UBER.COM. BR 39,91 0,00
21/08 Nestle Brasil LTDA. Sao Paulo BR 230,00 0,00
22/08 UBER * PENDING SAO PAULO BR 5,81 0,00
23/08 UBER* TRIP OSASCO BR 8,93 0,00
24/08 LOUNGERIE BRASILIA BR 207,60 0,00
24/08 GEORGE ROBERTO DE FRA BRASILIA BR 238,00 0,00
24/08 UBER* TRIP OSASCO BR 15,00 0,00
24/08 UBER* TRIP OSASCO BR 26,37 0,00
25/08 UBER * PENDING SAO PAULO BR 7,12 0,00
28/08 UBER * PENDING SAO PAULO BR 8,91 0,00
29/08 UBER* TRIP OSASCO BR 8,91 0,00
29/08 MERCADOLIVRE*SANTACOLOMOSASCO BR 379,90 0,00
Restaurantes
13/08 RESTAURANTE FAROFINA BRASILIA BR 96,80 0,00
14/08 LE VIN BRASILIA BR 160,47 0,00
14/08 Bacio di Latte-LJ3080 Brasilia BR 20,95 0,00
16/08 FRAN S CAFE BRASILIA BR 20,40 0,00
17/08 CAPPUCCINO BRASILIA BR 211,31 0,00
18/08 CASA ALMERIA BRASILIA BR 231,72 0,00
23/08 RESTAURANTE FAROFINA BRASILIA BR 79,90 0,00
25/08 JERONIMO FLAMBOYANT BG GOIANIA BR 98,00 0,00
26/08 Bianco GOIANIA BR 102,54 0,00
25/08 IFD*MELO GONCALVES LTDAGOIANIA BR 118,00 0,00
28/08 Frigideira BRASILIA BR 18,00 0,00
29/08 Frigideira BRASILIA BR 18,00 0,00
Compras/Pagamento de contas parceladas
Compras Diversas
17/01 BRASILIA EMPR PARC 08/12 BRASILIA BR 599,00 0,00
SubTotal 4.176,30 0,00
Total 11.300,85 0,00

99
etl.py

@ -1,9 +1,7 @@
def read_cc_full_invoice():
import re
from datetime import date, datetime
import locale
insert_query ="INSERT INTO default.TRANSACTION (TDATE, ACCOUNTID, MEMO, CITY, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )"
locale.setlocale(locale.LC_ALL, 'pt_BR.UTF-8')
def create_lists():
import re
# Open the text file
with open('OUROCARD_VISA_INFINITE-Ago_24.txt', 'r', encoding='latin') as file:
@ -14,64 +12,81 @@ def read_cc_full_invoice():
dan_pattern = r'1 - DANIEL.*'
iza_pattern = r'4 - IZABELY.*'
line_pattern = r'\d{2}\.\d{2}\.\d{4}.{23}.{14}.{2}\s*\d+,\d{2}\s*\d+,\d{2}'
line_group_pattern = r'(\d{2})\.(\d{2})\.(\d{4})(.{23})(.{14})(.{2})(\s*\d+,\d{2})(\s*\d+,\d{2})'
# Lists
list_dan = []
list_iza = []
current_list = None
insert_bulk = []
# Iterate all lines
for line in contents:
line = line.strip()
if re.match(dan_pattern, line):
current_list = 'list_dan'
print('found Dan')
elif re.match(iza_pattern, line):
current_list = 'list_iza'
print('found Iza')
else:
if re.match(line_pattern, line):
if current_list == 'list_dan':
print("dan", line)
list_dan.append(line)
if current_list == 'list_iza':
print("iza", line)
list_iza.append(line)
return [list_dan, list_iza]
def build_insert(lists: list[list, list], account: int, owner: int):
from datetime import date, datetime
insert_bulk = []
line_group_pattern = r'(\d{2})\.(\d{2})\.(\d{4})((.+PARC (\d+)\/(\d+))(\s.{12})|(.{23})(.{14}))(.{2})(\s*\d+,\d{2})(\s*\d+,\d{2})'
print('list_dan - tuples for insert')
for item in list_dan:
match = re.search(line_group_pattern, item)
tTdate = str(date(int(match.group(3)), int(match.group(2)), int(match.group(1))))
tAccount = 1
tMemo = match.group(4)
tCity = match.group(5)
tCountry = match.group(6)
tOutflow = match.group(7).strip().replace(',', '.')
tInflow = match.group(8).strip().replace(',', '.')
tOwner = 1
tInstallments = 1
tCreated = str(datetime.now(tz=None))
tUpdated = None
insert_bulk.append(( tTdate, tAccount, tMemo, tCity, tCountry, tOutflow, tInflow, tOwner, tInstallments, tCreated, tUpdated ))
for batch in lists:
for item in batch:
match = re.search(line_group_pattern, item)
tTdate = str(date(int(match.group(3)), int(match.group(2)), int(match.group(1))))
tAccount = account
print('list_dan - tuples for insert')
for item in list_iza:
match = re.search(line_group_pattern, item)
tTdate = str(date(int(match.group(3)), int(match.group(2)), int(match.group(1))))
tAccount = 1
tMemo = match.group(4)
tCity = match.group(5)
tCountry = match.group(6)
tOutflow = match.group(7).strip().replace(',', '.')
tInflow = match.group(8).strip().replace(',', '.')
tOwner = 2
tInstallments = 1
tCreated = str(datetime.now(tz=None))
tUpdated = None
insert_bulk.append(( tTdate, tAccount, tMemo, tCity, tCountry, tOutflow, tInflow, tOwner, tInstallments, tCreated, tUpdated ))
#* check for Installments
if match.group(5):
tMemo = match.group(5)
tCity = match.group(8)
tInstallmentNr = int(match.group(6))
tInstallmentTt = int(match.group(7))
else:
tMemo = match.group(9)
tCity = match.group(10)
tInstallmentNr = 1
tInstallmentTt = None
tCountry = match.group(11)
tOutflow = match.group(12).strip().replace(',', '.')
tInflow = match.group(13).strip().replace(',', '.')
tOwner = owner
tCreated = str(datetime.now(tz=None))
tUpdated = None
insert_bulk.append(( tTdate, tAccount, tMemo, tCity, tCountry, tOutflow, tInflow, tOwner, tInstallmentNr, tInstallmentTt, tCreated, tUpdated ))
return insert_bulk
def db_insert(insert_bulk: list[tuple]):
from mysql.connector import connect, Error
try:
with connect(
host='localhost',
user='root',
password='pleasehashapasswordomg',
database='default'
) as connection:
print("CONNECTED!", connection)
with connection.cursor() as cursor:
cursor.executemany(insert_query, insert_bulk)
connection.commit()
print("DONE!")
except Error as e:
print(e)
finally:
connection.close()
db_insert(build_insert(create_lists(), 1, 1))

1564
pdfScrape.ipynb

File diff suppressed because it is too large Load Diff

23
robopato.sql

@ -1,48 +1,49 @@
CREATE TABLE `ACCOUNTS` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`NAME` varchar(20),
`CREATED` datetime,
`UPDATED` datetime
);
CREATE TABLE `TRANSACTION` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`DATE` date,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`TDATE` date,
`ACCOUNTID` integer,
`DESC` varchar(23),
`CITY` varchar(14),
`MEMO` varchar(30),
`CITY` varchar(20),
`COUNTRY` char(2),
`OUTFLOW` decimal(20,2),
`INFLOW` decimal(12,2),
`OWNERID` integer,
`INSTALLMENTS` integer DEFAULT 1,
`INSTALLMENT_NR` integer,
`INSTALLMENT_TT` integer,
`CREATED` datetime,
`UPDATED` datetime
);
CREATE TABLE `PAYEE` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`NAME` varchar(20),
`CREATED` datetime,
`UPDATED` datetime
);
CREATE TABLE `OWNER` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`NAME` varchar(20),
`CREATED` datetime,
`UPDATED` datetime
);
CREATE TABLE `CATEGORY` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`NAME` varchar(20),
`CREATED` datetime,
`UPDATED` datetime
);
CREATE TABLE `SUBCATEGORY` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`CATEGORYID` integer,
`NAME` varchar(20),
`CREATED` datetime,
@ -50,7 +51,7 @@ CREATE TABLE `SUBCATEGORY` (
);
CREATE TABLE `CATEGORIZED_TRANSACTIONS` (
`ID` integer PRIMARY KEY AUTO_INCREMENT,
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`ACCOUNTID` integer,
`TRANSACTIONID` integer,
`PAYEEID` integer,

Loading…
Cancel
Save