implement diff of a sequence (for speed and c++ play).
This commit is contained in:
parent
5427a4defa
commit
e596d00971
@ -1,5 +1,6 @@
|
|||||||
#define PY_SSIZE_T_CLEAN
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
#include <vector>
|
||||||
#include "structmember.h"
|
#include "structmember.h"
|
||||||
#undef HAVE_CONFIG_H // this disables the php allocator
|
#undef HAVE_CONFIG_H // this disables the php allocator
|
||||||
#include "../mediawiki-php-wikidiff2/src/lib/Wikidiff2.h"
|
#include "../mediawiki-php-wikidiff2/src/lib/Wikidiff2.h"
|
||||||
@ -90,6 +91,79 @@ static PyObject *pywikidiff2_finalSplitThreshold(pywikidiff2Obj *self, PyObject
|
|||||||
return PyFloat_FromDouble(self->config.finalSplitThreshold);
|
return PyFloat_FromDouble(self->config.finalSplitThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Wikidiff2::String char_to_string(char* cstr){
|
||||||
|
Wikidiff2::String str(cstr, strlen(cstr));
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Wikidiff2::String wikidiff2_inline_json_diff(pywikidiff2Obj *self, Wikidiff2::String text1String, Wikidiff2::String text2String){
|
||||||
|
Wikidiff2 wikidiff2( *(&self->config));
|
||||||
|
InlineJSONFormatter formatter;
|
||||||
|
wikidiff2.addFormatter(formatter);
|
||||||
|
wikidiff2.execute(text1String, text2String);
|
||||||
|
Wikidiff2::String ret = formatter.getResult().str();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *pywikidiff2_inline_json_diff_sequence(pywikidiff2Obj *self, PyObject *args, PyObject *kwargs){
|
||||||
|
static char* kwdlist[] = {"texts", "numContextLines", NULL};
|
||||||
|
PyObject *py_list_obj;
|
||||||
|
Py_ssize_t list_size;
|
||||||
|
long numContextLines = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|$i",
|
||||||
|
kwdlist,
|
||||||
|
&py_list_obj,
|
||||||
|
&numContextLines
|
||||||
|
)){
|
||||||
|
|
||||||
|
PyErr_SetString(PyExc_ValueError, "Error in arguments to inline_json_many_diffs");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (!PyList_Check(py_list_obj)) {
|
||||||
|
PyErr_SetString(PyExc_TypeError, "Expected a list argument.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
list_size = PyList_Size(py_list_obj);
|
||||||
|
if (list_size < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numContextLines != NULL){
|
||||||
|
self->config.numContextLines = numContextLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<const char*> input_texts_str((size_t) list_size+1);
|
||||||
|
input_texts_str[0] = "";
|
||||||
|
// convert the inputs and make the list to convert
|
||||||
|
Py_ssize_t i;
|
||||||
|
for(i = 0; i<list_size; ++i){
|
||||||
|
PyObject *py_text = PyList_GetItem(py_list_obj, i); // Borrowed reference
|
||||||
|
if (!PyUnicode_Check(py_text)) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "List element at index %zd is not a string.", i);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_ssize_t text_size;
|
||||||
|
const char *c_text = PyUnicode_AsUTF8AndSize(py_text, &text_size);
|
||||||
|
if (c_text == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
input_texts_str[i+1] = c_text;
|
||||||
|
}
|
||||||
|
PyObject *result_list;
|
||||||
|
result_list = PyList_New(list_size);
|
||||||
|
for(i = 1; i<list_size+1; ++i){
|
||||||
|
Wikidiff2::String diff_str = wikidiff2_inline_json_diff(self, input_texts_str[i-1], input_texts_str[i]);
|
||||||
|
PyObject* py_diff = PyUnicode_FromFormat("%s",diff_str.c_str());
|
||||||
|
PyList_SetItem(result_list, i-1, py_diff);
|
||||||
|
}
|
||||||
|
return result_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *pywikidiff2_inline_json_diff(pywikidiff2Obj *self, PyObject *args, PyObject *kwargs){
|
static PyObject *pywikidiff2_inline_json_diff(pywikidiff2Obj *self, PyObject *args, PyObject *kwargs){
|
||||||
static char* kwdlist[] = {"text1", "text2", "numContextLines", NULL};
|
static char* kwdlist[] = {"text1", "text2", "numContextLines", NULL};
|
||||||
char* text1;
|
char* text1;
|
||||||
@ -105,24 +179,15 @@ static PyObject *pywikidiff2_inline_json_diff(pywikidiff2Obj *self, PyObject *ar
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if(numContextLines != NULL){
|
if(numContextLines != NULL){
|
||||||
*(&self->config.numContextLines) = numContextLines;
|
self->config.numContextLines = numContextLines;
|
||||||
}
|
}
|
||||||
Wikidiff2 wikidiff2( *(&self->config));
|
Wikidiff2::String ret = wikidiff2_inline_json_diff(self, text1, text2);
|
||||||
size_t text1_len = strlen(text1);
|
|
||||||
size_t text2_len = strlen(text2);
|
|
||||||
|
|
||||||
Wikidiff2::String text1String(text1, text1_len);
|
|
||||||
Wikidiff2::String text2String(text2, text2_len);
|
|
||||||
|
|
||||||
InlineJSONFormatter formatter;
|
|
||||||
wikidiff2.addFormatter(formatter);
|
|
||||||
wikidiff2.execute(text1String, text2String);
|
|
||||||
Wikidiff2::String ret = formatter.getResult().str();
|
|
||||||
return PyUnicode_FromFormat("%s",ret.c_str());
|
return PyUnicode_FromFormat("%s",ret.c_str());
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyMethodDef pywikidiff2_methods[] = {
|
static PyMethodDef pywikidiff2_methods[] = {
|
||||||
{"inline_json_diff", (PyCFunction) pywikidiff2_inline_json_diff, METH_VARARGS | METH_KEYWORDS, "run wikidiff 2 on text1 and text2"},
|
{"inline_json_diff", (PyCFunction) pywikidiff2_inline_json_diff, METH_VARARGS | METH_KEYWORDS, "run wikidiff 2 on text1 and text2"},
|
||||||
|
{"inline_json_diff_sequence", (PyCFunction) pywikidiff2_inline_json_diff_sequence, METH_VARARGS | METH_KEYWORDS, "run wikidiff 2 on a series of texts"},
|
||||||
{"num_context_lines", (PyCFunction) pywikidiff2_numContextLines, METH_NOARGS, "number of equal lines to output in the context of a diff"},
|
{"num_context_lines", (PyCFunction) pywikidiff2_numContextLines, METH_NOARGS, "number of equal lines to output in the context of a diff"},
|
||||||
{"moved_line_threshold", (PyCFunction) pywikidiff2_movedLineThreshold, METH_NOARGS, "The minimum similarity a pair of lines must have to be detected as a moved line. If present, this overrides php.ini wikidiff2.moved_line_threshold"},
|
{"moved_line_threshold", (PyCFunction) pywikidiff2_movedLineThreshold, METH_NOARGS, "The minimum similarity a pair of lines must have to be detected as a moved line. If present, this overrides php.ini wikidiff2.moved_line_threshold"},
|
||||||
{"change_threshold", (PyCFunction) pywikidiff2_changeThreshold, METH_NOARGS, "Changed lines with a similarity value below this threshold will be split into a deleted line and added line. This helps matching up moved lines in some cases."},
|
{"change_threshold", (PyCFunction) pywikidiff2_changeThreshold, METH_NOARGS, "Changed lines with a similarity value below this threshold will be split into a deleted line and added line. This helps matching up moved lines in some cases."},
|
||||||
|
@ -2,41 +2,32 @@ import pywikidiff2
|
|||||||
import json
|
import json
|
||||||
num_context_lines = 100000
|
num_context_lines = 100000
|
||||||
obj = pywikidiff2.pywikidiff2()
|
obj = pywikidiff2.pywikidiff2()
|
||||||
|
|
||||||
obj = pywikidiff2.pywikidiff2(numContextLines=num_context_lines,
|
obj = pywikidiff2.pywikidiff2(numContextLines=num_context_lines,
|
||||||
moved_paragraph_detection_cutoff=20000)
|
moved_paragraph_detection_cutoff=2000000)
|
||||||
assert obj.num_context_lines() == num_context_lines
|
# res = obj.inline_json_diff("help! \n I'm alive", "help! \n dead I am")
|
||||||
print(obj.moved_line_threshold())
|
# print(res)
|
||||||
print(obj.change_threshold())
|
|
||||||
print(obj.moved_paragraph_detection_cutoff())
|
|
||||||
print(obj.max_split_size())
|
|
||||||
print(obj.initial_split_threshold())
|
|
||||||
print(obj.final_split_threshold())
|
|
||||||
|
|
||||||
res = obj.inline_json_diff("help! \n I'm alive", "help! \n dead I am")
|
res = obj.inline_json_diff_sequence([open("test/1295229484",'r').read(),
|
||||||
|
open("test/1295229484_parmove_and_change",'r').read()],
|
||||||
|
numContextLines=num_context_lines)
|
||||||
print(res)
|
print(res)
|
||||||
print(obj.num_context_lines())
|
print("Successfully completed diff sequence!")
|
||||||
res = obj.inline_json_diff(open("test/err_last_text",'r').read(),
|
assert json.loads(res[0])['diff'][0]['type'] == 1
|
||||||
open("test/err_text",'r').read(),
|
print("Sequence diff passes assert 0")
|
||||||
numContextLines=10000)
|
print(res[1])
|
||||||
|
assert json.loads(res[1])['diff'][0]['type'] == 0
|
||||||
assert obj.num_context_lines() == 10000
|
print("Sequence diff passes assert 1")
|
||||||
|
assert obj.num_context_lines() == num_context_lines
|
||||||
|
print("Sequence diff passes assert 3")
|
||||||
|
|
||||||
res = obj.inline_json_diff("",
|
res = obj.inline_json_diff("",
|
||||||
open("test/1295229484",'r').read(),
|
open("test/1295229484",'r').read(),
|
||||||
numContextLines=1000)
|
numContextLines=1000)
|
||||||
assert obj.num_context_lines() == 1000
|
assert obj.num_context_lines() == 1000
|
||||||
|
|
||||||
res = obj.inline_json_diff(open("test/1285792388",'r').read(),
|
|
||||||
open("test/1295229484",'r').read(),
|
|
||||||
numContextLines=1000)
|
|
||||||
|
|
||||||
res = obj.inline_json_diff("",
|
|
||||||
open("test/1295229484_parmove_and_change",'r').read(),
|
|
||||||
numContextLines=1000)
|
|
||||||
print('\n')
|
|
||||||
print(res)
|
|
||||||
res = obj.inline_json_diff(open("test/1295229484",'r').read(),
|
res = obj.inline_json_diff(open("test/1295229484",'r').read(),
|
||||||
open("test/1295229484_parmove_and_change",'r').read())
|
open("test/1295229484_parmove_and_change",'r').read())
|
||||||
res = json.loads(res)
|
res = json.loads(res)
|
||||||
assert res["diff"][0]["type"] == 0
|
assert res["diff"][0]["type"] == 0
|
||||||
|
print("Success!")
|
||||||
|
Loading…
Reference in New Issue
Block a user