Potresti adattare questo codice che ho sviluppato una volta per partecipare a un contest:
Codice: Seleziona tutto
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>
#define FILE1 "../Files/DNA1.txt"
#define FILE2 "../Files/DNA2.txt"
#define BUFFER_SIZE 4096
#define MAX_STACK 100
#define PREFIX_SIZE 8
typedef struct tagRisultato
{
int pos1;
int pos2;
char p1[1024];
char p2[1024];
int len;
} Risultato;
typedef struct tagLista
{
int pos;
struct tagLista* next;
} Lista;
typedef struct tagTree
{
char key[PREFIX_SIZE + 1];
Lista *pLista;
struct tagTree *left;
struct tagTree *right;
} Tree;
Lista* ListNewNode(int val);
Lista* ListAppend(Lista* first, int val);
void ListFree(Lista* first);
Tree *TreeNewNode(char *pKey, int pos);
Tree *TreeInsertNode(Tree *node, char * pKey, int pos);
void TreeSearch(Tree *head, Tree **result, char *pKey);
void TreeFree(Tree *head);
Lista* ListNewNode(int val)
{
Lista *n;
n = (Lista *)malloc(sizeof(Lista));
if( n == NULL )
return NULL;
n->pos = val;
n->next = NULL;
return n;
}
Lista* ListAppend(Lista* first, int val)
{
Lista *n = first, *nuovo;
if ( first == NULL )
return ListNewNode(val);
n = first;
while( n->next != NULL )
{
n = n->next;
}
nuovo = ListNewNode(val);
n->next = nuovo;
return first;
}
void ListFree(Lista* first)
{
Lista *n1 = first, *n2;
while ( n1 != NULL )
{
n2 = n1->next;
free(n1);
n1 = n2;
}
}
Tree *TreeNewNode(char *pKey, int pos)
{
Tree *r;
Lista *l;
r = (Tree *) malloc(sizeof(Tree));
if(!r)
{
printf("Memoria insufficiente.\n");
return NULL;
}
l = (Lista *) malloc(sizeof(Lista));
if(!l)
{
printf("Memoria insufficiente.\n");
return NULL;
}
l->pos = pos;
l->next = NULL;
strcpy(r->key, pKey);
r->pLista = l;
r->left = NULL;
r->right = NULL;
return r;
}
Tree *TreeInsertNode(Tree *node, char * pKey, int pos)
{
int res;
Tree *pRadice = NULL;
if( !node )
{
node = TreeNewNode(pKey, pos);
return node;
}
pRadice = node;
while( 1 )
{
res = strcmp(pKey, node->key);
if ( res < 0 )
{
if ( !node->left )
{
node->left = TreeNewNode(pKey, pos);
break;
}
node = node->left;
}
else if ( res > 0 )
{
if ( !node->right )
{
node->right = TreeNewNode(pKey, pos);
break;
}
node = node->right;
}
else
{
node->pLista = ListAppend(node->pLista, pos);
break;
}
}
node = pRadice;
return node;
}
void TreeSearch(Tree *head, Tree **result, char *pKey)
{
int res;
Tree *node;
*result = NULL;
node = head;
if ( !head )
return;
while( 1 )
{
res = strcmp(pKey, node->key);
if ( res < 0 )
{
if ( !node->left )
break;
node = node->left;
}
else if ( res > 0 )
{
if ( !node->right )
break;
node = node->right;
}
else /* key == node->data */
{
*result = node;
break;
}
}
}
void TreeFree(Tree *head)
{
Tree *temp1, *temp2;
Tree *stack[MAX_STACK];
int top;
top = 0;
if ( !head )
return;
temp1 = temp2 = head;
while ( temp1 != NULL )
{
for(; temp1->left != NULL; temp1 = temp1->left)
stack[top++] = temp1;
while ( (temp1 != NULL) && (temp1->right == NULL || temp1->right == temp2) )
{
temp2 = temp1;
ListFree(temp2->pLista);
free(temp2);
if ( top == 0 )
return;
temp1 = stack[--top];
}
stack[top++] = temp1;
temp1 = temp1->right;
}
}
int LeggiDimensioniFile(char *szFileName)
{
FILE *fp;
long k;
fp = fopen(szFileName, "rb");
if ( fp == NULL )
return 0;
if ( fseek(fp, 0, SEEK_END) )
{
fclose(fp);
return 0;
}
k = ftell(fp);
fclose(fp);
return k;
}
int LeggiStringa(char *szFileName, char *buffer, int dimFile)
{
FILE *fp;
fp = fopen(szFileName, "r");
if ( fp == NULL )
return 0;
if ( fgets(buffer, dimFile+1, fp) == NULL )
{
printf("\nErrore nella lettura del file %s\n", szFileName);
fclose(fp);
return 0;
}
*(buffer + dimFile) = '\0';
fclose(fp);
return dimFile;
}
void Trova(char *szNomeFile1, char *szNomeFile2, Risultato *pRisultato)
{
Tree *pTree;
Tree *pNode;
Lista *pLista;
int p1_len, p2_len;
int k;
int len, len_prec;
int numErrors;
int bTrovato;
int MaxPrefix;
int x;
char strSearch[1024] = "";
char strRes[1024] = "";
char strTemp1[1024] = "";
char strTemp2[1024] = "";
int pos1, pos2;
int pos1x, pos2x;
int post2;
char *p1 = NULL;
char *p2 = NULL;
len = 0;
pos1x = pos2x = 0;
p1_len = LeggiDimensioniFile(szNomeFile1);
p2_len = LeggiDimensioniFile(szNomeFile2);
p1 = (char*)malloc(sizeof(char)*p1_len + 1);
if ( !p1 )
{
printf("Errore nell'allocazione della memoria.");
return;
}
p1[0] = '\0';
p2 = (char*)malloc(sizeof(char)*p2_len + 1);
if ( !p2 )
{
printf("Errore nell'allocazione della memoria.");
return;
}
p2[0] = '\0';
if ( !LeggiStringa(szNomeFile1, p1, p1_len) )
return;
if ( !LeggiStringa(szNomeFile2, p2, p2_len) )
return;
if ( PREFIX_SIZE < p2_len )
MaxPrefix = PREFIX_SIZE;
else
MaxPrefix = p2_len;
if ( p1_len < PREFIX_SIZE )
{
pRisultato->len = len;
pRisultato->pos1 = pos1x;
pRisultato->pos2 = pos2x;
strcpy(pRisultato->p1, strTemp1);
strcpy(pRisultato->p2, strTemp2);
free(p1);
free(p2);
return;
}
for ( x = MaxPrefix; x > 0; x--)
{
pTree = NULL;
pos1 = 0;
while ( pos1 < p1_len - x )
{
memcpy(strSearch, p1 + pos1, x);
*(strSearch + x) = '\0';
pTree = TreeInsertNode(pTree, strSearch, pos1);
pos1++;
}
pos1 = 0;
pos2 = 0;
strTemp1[0] = '\0';
strTemp2[0] = '\0';
strSearch[0] = '\0';
pLista = NULL;
pNode = NULL;
len = 0;
len_prec = 0;
numErrors = 0;
bTrovato = 0;
while ( pos2 < p2_len - x )
{
memcpy(strSearch, p2 + pos2, x);
*(strSearch + x) = '\0';
TreeSearch(pTree, &pNode, strSearch);
if ( pNode != NULL )
{
pLista = pNode->pLista;
while ( pLista != NULL )
{
pos1 = pLista->pos;
numErrors = 0;
k = x;
post2 = pos2;
if ( pos2 > x && pos1 > x )
{
k = 0;
while ( (*(p2 + pos2) == *(p1 + pos1)) || (numErrors < 2) )
{
if ( *(p2 + pos2) != *(p1 + pos1) )
numErrors++;
pos1--;
pos2--;
if ( pos2 < 0 || pos1 < 0 )
break;
}
pos1 += 1;
pos2 += 1;
numErrors = 0;
}
while ( (*(p2 + pos2 + k) == *(p1 + pos1 + k)) || (numErrors < 2) )
{
if ( *(p2 + pos2 + k) != *(p1 + pos1 + k) )
numErrors++;
k++;
}
if ( k > len_prec )
{
len = k;
pos1x = pos1;
pos2x = pos2;
len_prec = len;
bTrovato = 1;
}
pos2 = post2;
pLista = pLista->next;
}
}
pos2++;
}
TreeFree(pTree);
if ( bTrovato )
break;
}
pos1 = pos1x;
pos2 = pos2x;
len_prec = len;
numErrors = 0;
while ( (*(p1 + pos1) == *(p2 + pos2)) || (numErrors < 2) )
{
if ( *(p2 + pos2) != *(p1 + pos1) )
numErrors++;
pos2++;
pos1++;
if ( numErrors == 2 )
break;
}
len = 0;
numErrors = 0;
while ( (*(p1 + pos1 + len) == *(p2 + pos2 + len)) || (numErrors < 2) )
{
if ( *(p2 + pos2 + len) != *(p1 + pos1 + len) )
numErrors++;
len++;
}
if ( len_prec < len )
{
pos1x = pos1;
pos2x = pos2;
}
else
{
len = len_prec;
}
memcpy(strTemp1, p1 + pos1x, len);
*(strTemp1 + len) = '\0';
memcpy(strTemp2, p2 + pos2x, len);
*(strTemp2 + len) = '\0';
sprintf(strRes,
"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
strTemp1,
strTemp2,
len,
pos1x,
pos2x);
pRisultato->len = len;
pRisultato->pos1 = pos1x;
pRisultato->pos2 = pos2x;
strcpy(pRisultato->p1, strTemp1);
strcpy(pRisultato->p2, strTemp2);
x = len_prec + 1;
pos1 = 0;
pos2 = pos2x + 1;
strTemp1[0] = '\0';
strSearch[0] = '\0';
len_prec = len;
len = 0;
numErrors = 0;
memcpy(strSearch, p2 + pos2, x);
*(strSearch + x) = '\0';
k = 0;
while ( *(p1 + k) != *(p2 + k) )
k++;
pos1 = k;
while ( pos1 < p1_len - x )
{
memcpy(strTemp1, p1 + pos1, x);
*(strTemp1 + x) = '\0';
k = 0;
while ( k < x )
{
if ( *(strSearch + k) != *(strTemp1 + k) )
numErrors++;
if ( numErrors > 2 )
break;
k++;
}
if ( k >= x && len_prec > len )
{
len = k;
pos1x = pos1;
pos2x = pos2;
len_prec = len;
memcpy(strTemp1, p1 + pos1x, len);
*(strTemp1 + len) = '\0';
memcpy(strTemp2, p2 + pos2x, len);
*(strTemp2 + len) = '\0';
sprintf(strRes,
"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
strTemp1,
strTemp2,
len,
pos1x,
pos2x);
pRisultato->len = len;
pRisultato->pos1 = pos1x;
pRisultato->pos2 = pos2x;
strcpy(pRisultato->p1, strTemp1);
strcpy(pRisultato->p2, strTemp2);
break;
}
pos1++;
}
free(p1);
free(p2);
}
/*
gcc -Wall -Wextra -pedantic -O2 Contest04BParz.c -o c04
*/
int main()
{
Risultato ris;
char strTempo[512];
char strRes[1024];
clock_t c_start, c_end;
c_start = clock();
Trova(FILE1, FILE2, &ris);
c_end = clock();
sprintf(strRes,
"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
ris.p1,
ris.p2,
ris.len,
ris.pos1,
ris.pos2);
printf(strRes);
sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);
printf(strTempo);
return 0;
}
In pratica leggiamo due file, DNA1.txt(100058 caratteri) e DNA2.txt(80058 caratteri) contenenti, ciascuno, una lunga sequenza di DNA.
Il programma trova la più lunga sottostringa comune che differisce, al più, per due caratteri.