Używanie regex do zastępowania danych pliku

Z pewną pomocą z here, mam to działa prawie dokładnie tak, jak chcę. Teraz muszę mieć możliwość dodania możliwości usuwania danych z pliku , zanim pliki zostaną porównane..Używanie regex do zastępowania danych pliku

Powodem tego są ciągi znaków, "dane", które usuwam, różnią się za każdym razem, gdy plik jest zapisywany.

Napisałem wyrażenie regularne, aby wybrać dokładny tekst, który chcę usunąć, ale mam problemy z zaimplementowaniem go przy użyciu mojego obecnego kodu.

Oto trzy główne funkcje

HOSTNAME_RE = re.compile(r'hostname +(\S+)') 
def get_file_info_from_lines(filename, file_lines): 
    hostname = None 
    a_hash = hashlib.sha1() 
    for line in file_lines: 
     a_hash.update(line.encode('utf-8')) 
     match = HOSTNAME_RE.match(line) 
     if match: 
      hostname = match.group(1) 
    return hostname, filename, a_hash.hexdigest() 

def get_file_info(filename): 
    if filename.endswith(('.cfg', '.startup', '.confg')): 
     with open(filename, "r+") as in_file: 
      #filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE) 
      return get_file_info_from_lines(filename, in_file.readlines()) 

def hostname_parse(directory): 
    results = {} 
    i = 0 
    l = len(os.listdir(directory)) 
    for filename in os.listdir(directory): 
     filename = os.path.join(directory, filename) 
     sleep(0.001) 
     i += 1 
     progress_bar(i, l, prefix = 'Progress:', suffix = 'Complete', barLength = 50) 
     info = get_file_info(filename) 
     if info is not None: 
      results[info[0]] = info 
    return results

to jest regex do znalezienia sznurki należy usunąć.

REMOVE_RE = r"((?:\bCurrent configuration)(?:.*\n?){6})" 
subst = ""

EXAMPLE_FILE_BEFORE_DATA_REMOVED:

Building configuration... 

Current configuration : 45617 bytes 
! 
! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user 
! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user 
! 
version 15.0 
no service pad 
! 
no logging console 
enable secret 5 ***encrypted password*** 
! 
username admin privilege 15 password 7 ***encrypted password*** 
username sadmin privilege 15 secret 5 ***encrypted password*** 
aaa new-model 
! 
ip ftp username ***encrypted password*** 
ip ftp password 7 ***encrypted password*** 
ip ssh version 2 
! 
line con 0 
password 7 ***encrypted password*** 
login authentication maint 
line vty 0 4 
password 7 ***encrypted password*** 
length 0 
transport input ssh 
line vty 5 15 
password 7 ***encrypted password*** 
transport input ssh 
!

EXAMPLE_FILE_AFTER_DATA_REMOVED:

Building configuration... 

! 
no service pad 
! 
no logging console 
enable 
! 
username admin privilege 15 
username gisadmin privilege 15 
aaa new-model 
! 
ip ftp username cfgftp 
ip ftp 
ip ssh version 2 
! 
line con 0 

login authentication maint 
line vty 0 4 

length 0 
transport input ssh 
line vty 5 15 

transport input ssh 
!

Próbowałem robić coś podobnego #filename = re.sub (REMOVE_RE, subst, nazwy pliku, 0, re.MULTILINE) w obrębie get_file_info i get_file_info_from_lines b ut Oczywiście nie wdrażam go poprawnie.

Każda pomoc będzie doceniona, ponieważ dopiero się uczę.

uruchamiając Porównaj:

results1 = hostname_parse('test1.txt') 
results2 = hostname_parse('test2.txt') 



for hostname, filename, filehash in results1.values(): 
    if hostname in results2: 
     _, filename2, filehash2 = results2[hostname] 
     if filehash != filehash2: 
      print("%s has a change (%s, %s)" % (
       hostname, filehash, filehash2)) 
      print(filename) 
      print(filename2) 
      print()

Nie chcę, aby zmodyfikować bieżący plik. Jeśli wszystko to można zrobić w pamięci lub plik tymczasowy byłby świetny.

pełny kod:

import hashlib 
import os 
import re 


HOSTNAME_RE = re.compile(r'hostname +(\S+)') 
REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})") 


def get_file_info_from_lines(filename, file_lines): 
    hostname = None 
    a_hash = hashlib.sha1() 
    for line in file_lines: 
     #match = HOSTNAME_RE.match(line) 
     if not re.match(REMOVE_RE, line): 
      a_hash.update(line.encode('utf-8')) 
     #======================================================================= 
     # if match: 
     #  hostname = match.group(1) 
     #======================================================================= 
    return hostname, filename, a_hash.hexdigest() 

def get_file_info(filename): 
    if filename.endswith(('.cfg', '.startup', '.confg')): 
     with open(filename, "r+") as in_file: 
      return get_file_info_from_lines(filename, in_file.readlines()) 

def hostname_parse(directory): 
    results = {} 
    for filename in os.listdir(directory): 
     filename = os.path.join(directory, filename) 
     info = get_file_info(filename) 
     if info is not None: 
      results[info[0]] = info 
    return results 


results1 = hostname_parse('test1') #Directory of test files 
results2 = hostname_parse('test2') #Directory of test files 2 



for hostname, filename, filehash in results1.values(): 
    if hostname in results2: 
     _, filename2, filehash2 = results2[hostname] 
     if filehash != filehash2: 
      print("%s has a change (%s, %s)" % (
       hostname, filehash, filehash2)) 
      print(filename) 
      print(filename2) 
      print()

Źródło

2017-01-31 NineTail

Każda pomoc? Nadal nie jestem w stanie sprawić, żeby to działało poprawnie. – NineTail

Czy możesz dodać pełny kod, abyśmy wiedzieli, które biblioteki są tu używane? Nie zezwalaj na pracę Twojego kodu! Przepraszam –

@ Md.SifatulIslam Zrobione. – NineTail

udało mi się znaleźć sposób wokół regex. Po prostu usuwam linie, dopasowując linię.

def get_file_info_from_lines(filename, file_lines): 
    hostname = None 
    a_hash = hashlib.sha1() 
    for line in file_lines: 
     if "! Last " in line: 
      line = '' 
     if "! NVRAM " in line: 
      line = '' 
     a_hash.update(line.encode('utf-8')) 
     match = HOSTNAME_RE.match(line) 
     if match: 
      hostname = match.group(1)

Źródło

2017-02-24 17:21:55 NineTail

W get_file_info_from_lines, po prostu zignorować linię, jeśli pasuje do wyrażenia regularnego. W ten sposób nie trzeba modyfikować pliku lub tworzyć innego pliku, wystarczy obliczyć skrót z liniami, które mają znaczenie.

for line in file_lines: 
    if not re.match(REMOVE_RE, line): 
     a_hash.update(line.encode('utf-8'))

Źródło

2017-02-17 17:35:48 lufte

który wygląda tak, jak powinien działa, ale wciąż mówi, że jest różnica. Te dwa pliki są identyczne, z wyjątkiem części, w której znajduje się Aktualna konfiguracja. Dodaję lub usuwam kilka znaków do testowania. – NineTail

Po prostu wykonałem szybki test na wypadek, gdy coś przeoczyłem, ale działa dobrze. Z twoim kodem musi być coś jeszcze nie tak. – lufte

Cóż, mój kod jest taki sam jak powyżej z samą zmianą, którą wprowadziłeś i nadal zwraca, że pliki są różne. Czy zmieniłeś linie w bieżącej konfiguracji? Jak zmienić "przez użytkownika" na coś innego? Sun Jan 22 2017 by user1234 na przykład w jednym pliku, ale nie w drugim? – NineTail

Cześć Proponuję użyć następującego apporach: użyć funkcji do czyszczenia linii. Linie procesowe do usuwania pustych.

Aby porównać, użyj Difflib. Zastosowanie python -m doctest file.py sprawdzić doctest

import re 
source_content = """ 
Building configuration... 

Current configuration : 45617 bytes 
! 
! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user 
! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user 
! 
version 15.0 
no service pad 
! 
no logging console 
enable secret 5 ***encrypted password*** 
! 
username admin privilege 15 password 7 ***encrypted password*** 
username sadmin privilege 15 secret 5 ***encrypted password*** 
aaa new-model 
! 
ip ftp username ***encrypted password*** 
ip ftp password 7 ***encrypted password*** 
ip ssh version 2 
! 
line con 0 
password 7 ***encrypted password*** 
login authentication maint 
line vty 0 4 
password 7 ***encrypted password*** 
length 0 
transport input ssh 
line vty 5 15 
password 7 ***encrypted password*** 
transport input ssh 
! 
""" 

target_content = """ 
Building configuration... 

! 
no service pad 
! 
no logging console 
enable 
! 
username admin privilege 15 
username gisadmin privilege 15 
aaa new-model 
! 
ip ftp username cfgftp 
ip ftp 
ip ssh version 2 
! 
line con 0 

login authentication maint 
line vty 0 4 

length 0 
transport input ssh 
line vty 5 15 

transport input ssh 
! 
""" 



HOSTNAME_RE = re.compile(r'hostname +(\S+)') 
REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})") 


def process_line(line): 
    """ 
    >>> process_line('! rgrg') 
    '!' 
    >>> process_line('username admin privilege 15 password 7 ***encrypted password***') 

    """ 

    if line.startswith('!'): 
     return '!' 
    if HOSTNAME_RE.match(line): 
     return match.group(1) 
    if REMOVE_RE.match(line): 
     return '' 
    return line 

#debug 
for line in source_content.split('\n'): 
    print(repr(process_line(line).strip())) 

whitened = '\n'.join(process_line(line).strip() 
        for line in source_content.split('\n')) 

def clean_lines(lines, flag=''): 
    """ Replaces multiple 'flag' lines by only one. 
    """ 
    res = [] 
    in_block = False 
    for line in lines: 

     if line.strip('\n') == flag: 
      if not in_block: 
       res.append(line) 
       in_block = True 
      continue 
     in_block = False 
     res.append(line) 
    return res 

print('^^^^^^^^^^^^^^') 
no_exc = '\n'.join(clean_lines(whitened.split('\n'), flag='!')) 
print(no_exc) 
print('##############') 
no_sp = '\n'.join(clean_lines(no_exc.split('\n')))   
print(no_sp)

Źródło

2017-02-24 08:40:00 cgte

To jest najbliższy, jak widziałem, usunięcie linii, których nie chcę uwzględnić. Wygląda na to, że muszę wziąć całe podejście z powrotem na deskę kreślarską. – NineTail

Bez problemu. Popełniłeś błędy początkującego w architekturze. Stało się to wszystkim. Spójrz na ten film i przykładowe kody: https://www.youtube.com/watch?v=DJtef410XaM Właśnie zastosowałem to, co on zaleca :). – cgte

Używanie regex do zastępowania danych pliku

Odpowiedz

Powiązane problemy