Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // INCLUDE STATEMENTS
- #include "Python.h"
- #include <structmember.h>
- // MACRO DEFINITIONS
- #define SAFE_FREE(x) if (x != NULL){free(x);x = NULL;}
- #define SAFE_FREE_WITH_DECREF(x, m) { \
- if (x != NULL) { \
- int g; \
- for (g = 0; g < m; g++) { \
- Py_DECREF(x[g]); \
- } \
- free(x); \
- x = NULL; \
- } \
- }
- #define ASSIGN_MEMBER_NAME(self, x, n) {tmp = self->n; Py_INCREF(x); self->n = x; Py_XDECREF(tmp);}
- #define ASSIGN_MEMBER(self, x) ASSIGN_MEMBER_NAME(self, x, x)
- #define PRINT(x) printf("%s\n", PyString_AsString(PyObject_Str(x)))
- #define GENERIC_GETTER(attr) {Py_INCREF(self->attr); return self->attr;}
- #define TYPE_CHECK(typeChecker, msg) if (value != Py_None && !typeChecker(value)) \
- {PyErr_SetString(PyExc_TypeError, msg); return -1;}
- #ifndef max
- #define max(a, b) ( ((a) > (b)) ? (a) : (b) )
- #endif
- #ifndef min
- #define min(a, b) ( ((a) < (b)) ? (a) : (b) )
- #endif
- // TYPE DEFINITIONS
- // Simple struct to get around C's inability to pass arrays between functions.
- typedef struct
- {
- size_t* arr;
- size_t len;
- } ARRAY;
- // Function for freeing an ARRAY*.
- static void free_array(ARRAY* array)
- {
- free(array->arr);
- free(array);
- }
- /*
- * Struct representing a differ to find the longest common subsequence or "diff" of two sequences.
- * Fields:
- * aIndices List mapping indices in seqA to indices in the original seqA (before junk filtering)
- * bIndices List mapping indices in seqB to indices in the original seqB (before junk filtering)
- * eqTest A Python function that, given an element of seqA and one of seqB, returns
- * True if they are equal.
- * junkTest A Python function that, when called on an element of seqA or seqB, returns
- * True if the element is "junk" (to be ignored) and false otherwise.
- * lcs Holds the indices of the longest common subsequence of seqA and seqB.
- * Interleaved 1-D array that is twice as long as the LCS. Saved for optimization.
- * lengths 2-D table created during the LCS algorithm to hold the length of the LCS
- * for every possible prefix combination of seqA and seqB. Saved for optimization.
- * Simulated using a 1-D array; the row width is seqBLen - matchingPrefixLen
- * - matchingPostfixLen + 1 and the column is the same for seqALen.
- * matchingPostfixLen The length of the common subsequence at the end of both sequences.
- * matchingPrefixLen The length of the common subsequence at the start of both sequences.
- * rowSetup A Python function that will be called before each new "row" of the LCS length
- * table--that is, before proceeding to compare elements of seqB with a new
- * element of seqA, with the seqA element as its argument.
- * seqA The first sequence to compare.
- * seqALen The number of elements in the first sequence.
- * seqAList The original list given to specify seqA.
- * seqB The second sequence to compare.
- * seqBLen The number of elements in the second sequence.
- * seqBList The original list given to specify seqB.
- */
- typedef struct
- {
- PyObject_HEAD
- PyObject* seqAList;
- PyObject** seqA;
- size_t seqALen;
- size_t* aIndices;
- PyObject* seqBList;
- PyObject** seqB;
- size_t seqBLen;
- size_t* bIndices;
- size_t matchingPrefixLen;
- size_t matchingPostfixLen;
- size_t* lengths;
- ARRAY* lcs;
- PyObject* eqTest;
- PyObject* junkTest;
- PyObject* rowSetup;
- } DIFFER;
- // FUNCTION DEFINITIONS
- // Resets the calculated LCS information of a DIFFER*.
- // Frees and nulls lengths.
- // Frees and nulls lcs.
- static void reset_differ_lcs(DIFFER* differ)
- {
- SAFE_FREE(differ->lengths);
- if (differ->lcs != NULL)
- {
- free_array(differ->lcs);
- differ->lcs = NULL;
- }
- }
- // Resets the first sequence of the DIFFER* to be uninitialized, along with all data dependent on it.
- // Frees and nulls seqA, decrements the reference count of its members.
- // Sets seqALen to 0.
- // Frees and nulls aIndices.
- // Decrements the reference count of seqAList, nulls it.
- // Sets the matchingPrefixLength and matchingPostfixLength to 0.
- // Resets the LCS information.
- static void reset_differ_seqA(DIFFER* differ)
- {
- SAFE_FREE_WITH_DECREF(differ->seqA, differ->seqALen);
- differ->seqALen = 0;
- SAFE_FREE(differ->aIndices);
- if (differ->seqAList != NULL)
- {
- Py_DECREF(differ->seqAList);
- differ->seqAList = NULL;
- }
- differ->matchingPrefixLen = 0;
- differ->matchingPostfixLen = 0;
- reset_differ_lcs(differ);
- }
- // Resets the second sequence of the DIFFER* to be uninitialized, along with all data dependent on it.
- // Frees and nulls seqB, decrements the reference count of its members.
- // Sets seqBLen to 0.
- // Frees and nulls bIndices.
- // Decrements the reference count of seqBList, nulls it.
- // Sets the matchingPrefixLength and matchingPostfixLength to 0.
- // Resets the LCS information.
- static void reset_differ_seqB(DIFFER* differ)
- {
- SAFE_FREE_WITH_DECREF(differ->seqB, differ->seqBLen);
- differ->seqBLen = 0;
- SAFE_FREE(differ->bIndices);
- if (differ->seqBList != NULL)
- {
- Py_DECREF(differ->seqBList);
- differ->seqBList = NULL;
- }
- differ->matchingPrefixLen = 0;
- differ->matchingPostfixLen = 0;
- reset_differ_lcs(differ);
- }
- // Function for freeing a DIFFER* and all of its subfields.
- // Resets both sequences of the differ.
- // Decrements the reference count of eqTest, junkTest, and rowSetup. (Assumes they are not NULL)
- // Calls the tp_free member of the differ's type.
- static void free_differ(DIFFER* differ)
- {
- reset_differ_seqA(differ);
- reset_differ_seqB(differ);
- Py_DECREF(differ->eqTest);
- Py_DECREF(differ->junkTest);
- Py_DECREF(differ->rowSetup);
- differ->ob_type->tp_free((PyObject*)differ);
- }
- /* Filters a Python sequence and returns a filtered list of indices.
- * Inputs:
- * seq sequence The sequence to filter.
- * junkTest function A function that returns true for "junk" elements of the sequence.
- * Outputs:
- * An allocated ARRAY* of indices corresponding to the non-junk sequence elemebts.
- */
- static ARRAY* filter_sequence(PyObject* seq, PyObject* junkTest)
- {
- ARRAY* indices = malloc(sizeof(ARRAY));
- size_t i;
- size_t seqLength = PyList_Size(seq);
- unsigned int goodCount = 0;
- unsigned char isGood[seqLength];
- PyObject* junkResult;
- // Check which entries of the list are junk
- for (i = 0; i < seqLength; i++)
- {
- junkResult = PyObject_CallFunctionObjArgs(junkTest, PyList_GetItem(seq, i), NULL);
- if (PyObject_Not(junkResult))
- {
- goodCount++;
- isGood[i] = 1;
- }
- else
- {
- isGood[i] = 0;
- }
- Py_DECREF(junkResult);
- }
- // Create indices array to return
- indices->len = goodCount;
- size_t* indicesArr = malloc(sizeof(size_t) * goodCount);
- int indI = 0;
- for (i = 0; i < seqLength; i++)
- {
- if (isGood[i])
- {
- indicesArr[indI] = i;
- indI++;
- }
- }
- indices->arr = indicesArr;
- return indices;
- }
- /* Sets one of the sequences of a Differ to a new value.
- * Inputs:
- * differ The differ to assign to.
- * seq The new sequence.
- * a If 1, the differ's seqA will be overwritten. If 0, seqB wil be overwritten.
- */
- static void _differ_set_seq(DIFFER* differ, PyObject* newSeq, char a)
- {
- int i;
- PyObject** seq;
- size_t seqLen;
- size_t* indicesArr;
- if (differ->junkTest != Py_None)
- {
- ARRAY* indices = filter_sequence(newSeq, differ->junkTest);
- seqLen = indices->len;
- indicesArr = indices->arr;
- free(indices);
- }
- else
- {
- seqLen = PyList_Size(newSeq);
- indicesArr = malloc(sizeof(size_t) * seqLen);
- for (i = 0; i < seqLen; i++)
- {
- indicesArr[i] = i;
- }
- }
- seq = malloc(sizeof(PyObject*) * seqLen);
- for (i = 0; i < seqLen; i++)
- {
- seq[i] = PyList_GetItem(newSeq, i);
- Py_INCREF(seq[i]);
- }
- PyObject* tmp;
- if (a)
- {
- reset_differ_seqA(differ);
- differ->seqA = seq;
- differ->seqALen = seqLen;
- differ->aIndices = indicesArr;
- ASSIGN_MEMBER_NAME(differ, newSeq, seqAList);
- }
- else
- {
- reset_differ_seqB(differ);
- differ->seqB = seq;
- differ->seqBLen = seqLen;
- differ->bIndices = indicesArr;
- ASSIGN_MEMBER_NAME(differ, newSeq, seqBList);
- }
- }
- //Custom setter/getter for a differ's sequence A that ensures data coherence and checks type
- static int py_differ_set_a(DIFFER* self, PyObject* value, void* closure)
- {
- TYPE_CHECK(PySequence_Check, "a must be a sequence");
- _differ_set_seq(self, value, 1);
- return 0;
- }
- static PyObject* py_differ_get_a(DIFFER* self, void* closure) {GENERIC_GETTER(seqAList)}
- //Custom setter/getter for a differ's sequence B that ensures data coherence and checks type
- static int py_differ_set_b(DIFFER* self, PyObject* value, void* closure)
- {
- TYPE_CHECK(PySequence_Check, "b must be a sequence");
- _differ_set_seq(self, value, 0);
- return 0;
- }
- static PyObject* py_differ_get_b(DIFFER* self, void* closure) {GENERIC_GETTER(seqBList)}
- static int py_differ_set_eqTest(DIFFER* self, PyObject* value, void* closure)
- {
- PyObject* tmp;
- TYPE_CHECK(PyFunction_Check, "eqTest must be a function");
- ASSIGN_MEMBER_NAME(self, value, eqTest);
- return 0;
- }
- static PyObject* py_differ_get_eqTest(DIFFER* self, void* closure) {GENERIC_GETTER(eqTest)}
- static int py_differ_set_junkTest(DIFFER* self, PyObject* value, void* closure)
- {
- PyObject* tmp;
- TYPE_CHECK(PyFunction_Check, "junkTest must be a function");
- ASSIGN_MEMBER_NAME(self, value, junkTest);
- return 0;
- }
- static PyObject* py_differ_get_junkTest(DIFFER* self, void* closure) {GENERIC_GETTER(junkTest)}
- static int py_differ_set_rowSetup(DIFFER* self, PyObject* value, void* closure)
- {
- PyObject* tmp;
- TYPE_CHECK(PyFunction_Check, "rowSetup must be a function");
- ASSIGN_MEMBER_NAME(self, value, rowSetup);
- return 0;
- }
- static PyObject* py_differ_get_rowSetup(DIFFER* self, void* closure) {GENERIC_GETTER(rowSetup)}
- // Initializes a DIFFER* struct with the given Python methods. If None, eqTest defaults to operator.eq.
- static void setup_differ(DIFFER* differ, PyObject* eqTest, PyObject* junkTest, PyObject* rowSetup)
- {
- differ->seqA = NULL;
- differ->seqALen = 0;
- differ->aIndices = NULL;
- differ->seqAList = NULL;
- differ->seqB = NULL;
- differ->seqBLen = 0;
- differ->bIndices = NULL;
- differ->seqBList = NULL;
- differ->matchingPrefixLen = 0;
- differ->matchingPostfixLen = 0;
- differ->lengths = NULL;
- differ->lcs = NULL;
- PyObject* tmp;
- if (eqTest != Py_None)
- {
- py_differ_set_eqTest(differ, eqTest, NULL);
- }
- else
- {
- PyObject* modules = PyImport_GetModuleDict();
- PyObject* opMod = PyDict_GetItemString(modules, "operator");
- PyObject* eq = PyObject_GetAttrString(opMod, "eq");
- ASSIGN_MEMBER_NAME(differ, eq, eqTest)
- }
- py_differ_set_junkTest(differ, junkTest, NULL);
- py_differ_set_rowSetup(differ, rowSetup, NULL);
- }
- /* Initializes the Differ object.
- * Python API:
- * a sequence The first sequence to compare.
- * b sequence The second sequence to compare.
- * eqTest function A function taking one element of a and one of b that returns true iff
- * they are considered "equal". If None, defaults to operator.eq.
- * junkTest function A function taking one element of a or b that returns True iff it is
- * to be considered "junk" and not compared. If None, no elements will be
- * considered junk.
- * rowSetup function A function that is called with each successive element of a just before
- * it is compared. The element of a that is supplied as the first argument
- * of eqTest will not change in between calls to rowSetup. Useful for
- * equality tests that can be optimized if only one element being compared
- * changes at a time. If None, does not call any row setup function.
- */
- static int Differ_init(DIFFER* self, PyObject* args, PyObject* kwds)
- {
- PyObject* a;
- PyObject* b;
- PyObject* eqTest;
- PyObject* junkTest;
- PyObject* rowSetup;
- if (!PyArg_ParseTuple(args, "OOOOO", &a, &b, &eqTest, &junkTest, &rowSetup))
- {
- return -1;
- }
- setup_differ(self, eqTest, junkTest, rowSetup);
- py_differ_set_a(self, a, NULL);
- py_differ_set_b(self, b, NULL);
- return 0;
- }
- // Creates and returns a new Differ object
- static PyObject* Differ_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
- {
- DIFFER* differ = (DIFFER*)type->tp_alloc(type, 0);
- setup_differ(differ, Py_None, Py_None, Py_None);
- return (PyObject*)differ;
- }
- // Methods for cyclic garbage collection
- static int Differ_traverse(DIFFER* self, visitproc visit, void* arg)
- {
- Py_VISIT(self->seqAList);
- Py_VISIT(self->seqBList);
- return 0;
- }
- static int Differ_clear(DIFFER* self)
- {
- Py_CLEAR(self->seqAList);
- Py_CLEAR(self->seqBList);
- return 0;
- }
- // LCS ALGORITHM FUNCTIONS
- // Calculates the length of the LCS of the sequences; fills out the lengths table in the process
- // Uses the LCS algorithm from "Introduction to Algorithms", 3rd edition, by Cormen, Leiserson,
- // Rivest, and Stein.
- // This function is by far the most time-consuming, so optimize it!
- static size_t lcs_length(DIFFER* differ)
- {
- size_t mpl;
- size_t mpsl;
- size_t m;
- size_t n;
- size_t mBound;
- size_t nBound;
- size_t last;
- size_t i;
- size_t j;
- if (differ->lengths == NULL)
- {
- PyObject** seqA = differ->seqA;
- m = differ->seqALen;
- PyObject** seqB = differ->seqB;
- n = differ->seqBLen;
- int o;
- PyObject* eqTest = differ->eqTest;
- PyObject* rowSetup = differ->rowSetup;
- PyObject* result;
- char eq;
- //Check for matching prefix and postfix
- mpl = 0;
- if (rowSetup != Py_None && m > 0)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, seqA[0], NULL));
- }
- while (mpl < m)
- {
- result = PyObject_CallFunctionObjArgs(eqTest, seqA[mpl], seqB[mpl], NULL);
- eq = PyObject_IsTrue(result);
- Py_DECREF(result);
- if (!eq)
- {
- break;
- }
- else
- {
- mpl++;
- if (mpl < m && rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, seqA[mpl], NULL));
- }
- }
- }
- differ->matchingPrefixLen = mpl;
- size_t maxA = m - 1;
- size_t maxB = n - 1;
- m -= mpl;
- n -= mpl;
- o = min(m, n);
- mpsl = 0;
- if (rowSetup != Py_None && m > 0)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, seqA[maxA], NULL));
- }
- while (mpsl < o)
- {
- result = PyObject_CallFunctionObjArgs(eqTest, seqA[maxA - mpsl], seqB[maxB - mpsl], NULL);
- if (PyObject_Not(result))
- {
- Py_DECREF(result);
- break;
- }
- else
- {
- Py_DECREF(result);
- mpsl++;
- if (mpsl < o && rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, seqA[maxA - mpsl], NULL));
- }
- }
- }
- differ->matchingPostfixLen = mpsl;
- m -= mpsl;
- n -= mpsl;
- //Height of the table
- mBound = m + 1;
- //Width of the table
- nBound = n + 1;
- last = mBound * nBound - 1;
- PyObject* rowElement;
- //Main LCS algorithm
- //Simulated 2D array; lengths[i][j] -> lengths[nBound * i + j]
- size_t* lengths = malloc(sizeof(size_t) * mBound * nBound);
- for (i = 0; i < mBound; i++)
- {
- lengths[i*nBound] = 0;
- }
- for (i = 1; i < nBound; i++)
- {
- lengths[i] = 0;
- }
- size_t up = 0;
- size_t current = nBound;
- if (m > 0 && n > 0)
- {
- seqA = seqA + mpl;
- seqB = seqB + mpl;
- for (i = 1; i < mBound; i++)
- {
- lengths[current] = 0;
- up++;
- current++;
- rowElement = seqA[i-1];
- if (rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, rowElement, NULL));
- }
- for (j = 1; j < nBound; j++)
- {
- result = PyObject_CallFunctionObjArgs(eqTest, rowElement, seqB[j-1], NULL);
- eq = PyObject_IsTrue(result);
- Py_DECREF(result);
- if (eq)
- {
- lengths[current] = lengths[up-1] + 1;
- }
- else if (lengths[up] >= lengths[current-1])
- {
- lengths[current] = lengths[up];
- }
- else
- {
- lengths[current] = lengths[current-1];
- }
- up++;
- current++;
- }
- }
- }
- differ->lengths = lengths;
- }
- mpl = differ->matchingPrefixLen;
- mpsl = differ->matchingPostfixLen;
- m = differ->seqALen;
- n = differ->seqBLen;
- mBound = m - mpl - mpsl + 1;
- nBound = n - mpl - mpsl + 1;
- last = mBound * nBound - 1;
- return differ->lengths[last] + mpl + mpsl;
- }
- // Returns a one-dimensional array with the LCS indices of the two sequences interleaved (that is,
- // the indices of the LCS elements in the filtered sequences, first a then b)
- static ARRAY* lcs_indices(DIFFER* differ)
- {
- size_t i;
- size_t j;
- if (differ->lcs == NULL)
- {
- size_t lcsLen = lcs_length(differ);
- ARRAY* lcs = malloc(sizeof(ARRAY));
- lcs->len = lcsLen;
- size_t* lcsArr = malloc(sizeof(size_t) * lcsLen * 2);
- lcs->arr = lcsArr;
- if (lcsLen > 0)
- {
- size_t m = differ->seqALen;
- size_t n = differ->seqBLen;
- // printf("m=%d\tn=%d\n", m, n);
- size_t mpl = differ->matchingPrefixLen;
- size_t mpsl = differ->matchingPostfixLen;
- PyObject** seqA = differ->seqA;
- PyObject** seqB = differ->seqB;
- size_t* lengths = differ->lengths;
- size_t mBound = m - mpl - mpsl + 1;
- size_t nBound = n - mpl - mpsl + 1;
- PyObject* eqTest = differ->eqTest;
- PyObject* rowSetup = differ->rowSetup;
- for (i = 0; i < mpl; i++)
- {
- lcsArr[2 * i] = i;
- lcsArr[2 * i + 1] = i;
- }
- for (i = 0; i < mpsl; i++)
- {
- lcsArr[2*(lcsLen - i - 1)] = m - i - 1;
- lcsArr[2*(lcsLen - i - 1) + 1] = n - i - 1;
- }
- // printf("mpl=%d\tmpsl=%d\n", mpl, mpsl);
- size_t lcsIndex = 2*(lcsLen - 1 - mpsl);
- // printf("lcsLen=%d\tlcsIndex=%d\n", lcsLen, lcsIndex);
- i = mBound - 1;
- j = nBound - 1;
- PyObject* result;
- char eq;
- PyObject* rowElement = seqA[mpl+i-1];
- if (rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, rowElement, NULL));
- }
- while (i >= 0 && j >= 0 && lcsIndex >= 2*mpl)
- {
- result = PyObject_CallFunctionObjArgs(eqTest, rowElement, seqB[mpl+j-1], NULL);
- // printf("i=%d\tj=%d\tlcsI=%d\n", i, j, lcsIndex);
- eq = PyObject_IsTrue(result);
- Py_DECREF(result);
- if (eq)
- {
- lcsArr[lcsIndex] = mpl + i - 1;
- lcsArr[lcsIndex+1] = mpl + j - 1;
- lcsIndex -= 2;
- i -= 1;
- rowElement = seqA[mpl+i-1];
- if (rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, rowElement, NULL));
- }
- j -= 1;
- }
- else if (lengths[(i-1)*nBound + j] >= lengths[i*nBound + j - 1])
- {
- i -= 1;
- rowElement = seqA[mpl+i-1];
- if (rowSetup != Py_None)
- {
- Py_DECREF(PyObject_CallFunctionObjArgs(rowSetup, rowElement, NULL));
- }
- }
- else
- {
- j -= 1;
- }
- }
- }
- differ->lcs = lcs;
- }
- return differ->lcs;
- }
- /* Python wrapper function for LCS length algorithm.
- * Outputs:
- * The length of the LCS, as an integer.
- */
- static PyObject* py_lcs_length(DIFFER* self)
- {
- size_t length = lcs_length(self);
- return Py_BuildValue("I", length);
- }
- /* Python wrapper function for LCS indices algorithm
- * Outputs:
- * The indices of the elements of the LCS in each (filtered) sequence, as a list of 2-tuples.
- */
- static PyObject* py_lcs_indices(DIFFER* self)
- {
- ARRAY* lcs = lcs_indices(self);
- size_t lcsLen = lcs->len;
- size_t* lcsArr = lcs->arr;
- PyObject* lcsList = PyList_New(lcsLen);
- size_t i;
- for (i = 0; i < lcsLen; i++)
- {
- PyList_SET_ITEM(lcsList, i, Py_BuildValue("(II)", lcsArr[2*i], lcsArr[2*i+1]));
- }
- return lcsList;
- }
- /* Python function returning the elements of the LCS in a list.
- * Outputs:
- * A list of the actual elements of the LCS.
- */
- static PyObject* py_lcs(DIFFER* self)
- {
- ARRAY* lcs = lcs_indices(self);
- size_t* lcsArr = lcs->arr;
- size_t lcsLen = lcs->len;
- PyObject* result = PyList_New(lcsLen);
- PyObject** seqA = self->seqA;
- size_t i;
- for (i = 0; i < lcsLen; i++)
- {
- Py_INCREF(seqA[lcsArr[2*i]]);
- PyList_SET_ITEM(result, i, seqA[lcsArr[2*i]]);
- }
- return result;
- }
- // Underlying index conversion method.
- static size_t _convert_index(DIFFER* differ, size_t i, char a)
- {
- if (a)
- {
- if (i < differ->seqALen)
- {
- return differ->aIndices[i];
- }
- else
- {
- return differ->seqALen;
- }
- }
- else
- {
- if (i < differ->seqBLen)
- {
- return differ->bIndices[i];
- }
- else
- {
- return differ->seqBLen;
- }
- }
- }
- // Converts an index on a differ's filtered sequence A to an index in the original sequence A.
- static size_t convert_a_index(DIFFER* differ, size_t i)
- {
- return _convert_index(differ, i, 1);
- }
- // Converts an index on a differ's filtered sequence B to an index in the original sequence B.
- static size_t convert_b_index(DIFFER* differ, size_t i)
- {
- return _convert_index(differ, i, 0);
- }
- // Finds the length of an interval in a diffr's ORIGINAL sequence A, from indices on the filtered
- static size_t convert_a_len(DIFFER* differ, size_t i1, size_t i2)
- {
- size_t convertedI1 = _convert_index(differ, i1, 1);
- if (i1 != i2)
- {
- return differ->aIndices[i2-1] + 1 - convertedI1;
- }
- else
- {
- return 0;
- }
- }
- // Finds the length of an interval in a diffr's ORIGINAL sequence B, from indices on the filtered
- static size_t convert_b_len(DIFFER* differ, size_t i1, size_t i2)
- {
- size_t convertedI1 = _convert_index(differ, i1, 0);
- if (i1 != i2)
- {
- return differ->bIndices[i2-1] + 1 - convertedI1;
- }
- else
- {
- return 0;
- }
- }
- /* Returns a list of 3-tuples with information on the "blocks" of the sequences that matched.
- * Outputs:
- * A list of 3-tuples, one for each matching block, of the form (aStart, bStart, len);
- * the indices and length are in terms of the original, unfiltered sequences.
- */
- static PyObject* py_get_matching_blocks(DIFFER* self)
- {
- ARRAY* lcs = lcs_indices(self);
- size_t* lcsArr = lcs->arr;
- size_t lcsLen = lcs->len;
- PyObject* result = PyList_New(0);
- if (lcsLen == 0)
- {
- return result;
- }
- size_t i;
- size_t j;
- size_t x;
- size_t curBlockI = -1;
- size_t curBlockJ = -1;
- size_t curBlockLength = 0;
- for (x = 0; x < lcsLen; x++)
- {
- i = lcsArr[2*x];
- j = lcsArr[2*x+1];
- if (curBlockI != -1)
- {
- if (i > curBlockI + curBlockLength || j > curBlockJ + curBlockLength)
- {
- if (PyList_Append(result, Py_BuildValue("III", convert_a_index(self, curBlockI),
- convert_b_index(self, curBlockJ),
- curBlockLength)) != 0)
- return NULL;
- curBlockI = i;
- curBlockJ = j;
- curBlockLength = 1;
- }
- else
- {
- curBlockLength++;
- }
- }
- else
- {
- curBlockI = i;
- curBlockJ = j;
- curBlockLength = 1;
- }
- }
- if (lcsArr[lcsLen*2 - 2] == self->seqALen - 1 && lcsArr[lcsLen*2 -1] == self->seqBLen - 1)
- {
- if (PyList_Append(result, Py_BuildValue("III", convert_a_index(self, curBlockI),
- convert_b_index(self, curBlockJ),
- curBlockLength)) != 0)
- return NULL;
- }
- return result;
- }
- /* Returns a list of 4-tuples with information on the "blocks" of the sequences that didn't match.
- * Outputs:
- * A list of 4-tuples, one for each differing block, of the form (aStart, aLen, bStart, bLen);
- * the indices and length are in terms of the original, unfiltered sequences.
- */
- static PyObject* py_get_differing_blocks(DIFFER* self)
- {
- ARRAY* lcs = lcs_indices(self);
- size_t* lcsArr = lcs->arr;
- size_t lcsLen = lcs->len;
- PyObject* result = PyList_New(0);
- if (lcsLen == 0)
- {
- return result;
- }
- size_t i;
- size_t j;
- size_t x;
- size_t prevI = -1;
- size_t prevJ = -1;
- size_t trueI;
- size_t trueLenI;
- size_t trueJ;
- size_t trueLenJ;
- if (lcsArr[0] != 0 || lcsArr[1] != 0)
- {
- trueI = convert_a_index(self, 0);
- trueLenI = convert_a_len(self, 0, lcsArr[0]);
- trueJ = convert_b_index(self, 0);
- trueLenJ = convert_b_len(self, 0, lcsArr[1]);
- printf("Appending (beginning)\n");
- if (PyList_Append(result, Py_BuildValue("IIII", trueI, trueLenI, trueJ, trueLenJ)) != 0)
- return NULL;
- }
- for (x = 0; x < lcsLen; x++)
- {
- i = lcsArr[2*x];
- j = lcsArr[2*x+1];
- if (prevI != -1 && (i > prevI + 1 || j > prevJ + 1))
- {
- trueI = convert_a_index(self, prevI + 1);
- trueLenI = convert_a_len(self, prevI + 1, i);
- trueJ = convert_b_index(self, prevJ + 1);
- trueLenJ = convert_b_len(self, prevJ + 1, j);
- printf("Appending (loop)\n");
- if (PyList_Append(result, Py_BuildValue("IIII", trueI, trueLenI, trueJ, trueLenJ)) != 0)
- return NULL;
- }
- prevI = i;
- prevJ = j;
- }
- printf("%d %d %d %d\n", self->seqALen, self->seqBLen, lcsArr[lcsLen*2 - 2], lcsArr[lcsLen*2 - 1]);
- if (lcsArr[lcsLen*2 - 2] < self->seqALen - 1 || lcsArr[lcsLen*2 -1] < self->seqBLen - 1)
- {
- trueI = convert_a_index(self, lcsArr[lcsLen*2-2]+1);
- trueLenI = convert_a_len(self, lcsArr[lcsLen*2-2]+1, self->seqALen);
- trueJ = convert_b_index(self, lcsArr[lcsLen*2-1]+1);
- trueLenJ = convert_b_len(self, lcsArr[lcsLen*2-1]+1, self->seqBLen);
- printf("Appending (end)\n");
- if (PyList_Append(result, Py_BuildValue("IIII", trueI, trueLenI, trueJ, trueLenJ)) != 0)
- return NULL;
- }
- return result;
- }
- //Getter/setter definitions for Differ class
- static PyGetSetDef Differ_getsetters[] = {
- {"a", (getter)py_differ_get_a, (setter)py_differ_set_a, "The first sequence", NULL},
- {"b", (getter)py_differ_get_b, (setter)py_differ_set_b, "The second sequence", NULL},
- {"eqTest", (getter)py_differ_get_eqTest, (setter)py_differ_set_eqTest,
- "The equality test (2-argument)", NULL},
- {"junkTest", (getter)py_differ_get_junkTest, (setter)py_differ_set_junkTest,
- "The junk test (1-argument)", NULL},
- {"rowSetup", (getter)py_differ_get_rowSetup, (setter)py_differ_set_rowSetup,
- "The row setup function (1-argument)", NULL},
- {NULL}
- };
- //Other member variable definitions for Differ
- static PyMemberDef Differ_members[] = {
- {NULL}
- };
- //Object methods for Differ
- static PyMethodDef Differ_methods[] = {
- {"LCSLength", (PyCFunction)py_lcs_length, METH_NOARGS, NULL},
- {"LCSIndices", (PyCFunction)py_lcs_indices, METH_NOARGS, NULL},
- {"LCS", (PyCFunction)py_lcs, METH_NOARGS, NULL},
- {"GetMatchingBlocks", (PyCFunction)py_get_matching_blocks, METH_NOARGS, NULL},
- {"GetDifferingBlocks", (PyCFunction)py_get_differing_blocks, METH_NOARGS, NULL},
- {NULL}
- };
- //Module methods for _diff
- static PyMethodDef module_methods[] = {
- {NULL}
- };
- // Differ type definition
- static PyTypeObject DIFFER_Type = {
- PyObject_HEAD_INIT(NULL)
- 0, /*ob_size*/
- "diff.Differ", /*tp_name*/
- sizeof(DIFFER), /*tp_basicsize*/
- 0, /*tp_itemsize*/
- (destructor)free_differ, /*tp_dealloc*/
- 0, /*tp_print*/
- 0, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_compare*/
- 0, /*tp_repr*/
- 0, /*tp_as_number*/
- 0, /*tp_as_sequence*/
- 0, /*tp_as_mapping*/
- 0, /*tp_hash */
- 0, /*tp_call*/
- 0, /*tp_str*/
- 0, /*tp_getattro*/
- 0, /*tp_setattro*/
- 0, /*tp_as_buffer*/
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
- "Differ objects", /* tp_doc */
- (traverseproc)Differ_traverse, /* tp_traverse */
- (inquiry)Differ_clear, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- Differ_methods, /* tp_methods */
- Differ_members, /* tp_members */
- Differ_getsetters, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- (initproc)Differ_init, /* tp_init */
- 0, /* tp_alloc */
- Differ_new, /* tp_new */
- };
- #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
- #define PyMODINIT_FUNC void
- #endif
- PyMODINIT_FUNC initcdiff(void)
- {
- PyObject* m;
- if (PyType_Ready(&DIFFER_Type) < 0)
- return;
- //Import statements
- PyImport_ImportModule("operator");
- m = Py_InitModule("cdiff", module_methods);
- if (m == NULL)
- {
- return;
- }
- Py_INCREF(&DIFFER_Type);
- PyModule_AddObject(m, "Differ", (PyObject*)&DIFFER_Type);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement